xref: /openbsd/gnu/usr.bin/perl/ext/Opcode/Opcode.pm (revision d89ec533)
1package Opcode;
2
3use 5.006_001;
4
5use strict;
6
7our($VERSION, @ISA, @EXPORT_OK);
8
9$VERSION = "1.48";
10
11use Carp;
12use Exporter ();
13use XSLoader;
14
15BEGIN {
16    @ISA = qw(Exporter);
17    @EXPORT_OK = qw(
18	opset ops_to_opset
19	opset_to_ops opset_to_hex invert_opset
20	empty_opset full_opset
21	opdesc opcodes opmask define_optag
22	opmask_add verify_opset opdump
23    );
24}
25
26sub opset (;@);
27sub opset_to_hex ($);
28sub opdump (;$);
29use subs @EXPORT_OK;
30
31XSLoader::load();
32
33_init_optags();
34
35sub ops_to_opset { opset @_ }	# alias for old name
36
37sub opset_to_hex ($) {
38    return "(invalid opset)" unless verify_opset($_[0]);
39    unpack("h*",$_[0]);
40}
41
42sub opdump (;$) {
43	my $pat = shift;
44    # handy utility: perl -MOpcode=opdump -e 'opdump File'
45    foreach(opset_to_ops(full_opset)) {
46        my $op = sprintf "  %12s  %s\n", $_, opdesc($_);
47		next if defined $pat and $op !~ m/$pat/i;
48		print $op;
49    }
50}
51
52
53
54sub _init_optags {
55    my(%all, %seen);
56    @all{opset_to_ops(full_opset)} = (); # keys only
57
58    local($_);
59    local($/) = "\n=cut"; # skip to optags definition section
60    <DATA>;
61    $/ = "\n=";		# now read in 'pod section' chunks
62    while(<DATA>) {
63	next unless m/^item\s+(:\w+)/;
64	my $tag = $1;
65
66	# Split into lines, keep only indented lines
67	my @lines = grep { m/^\s/    } split(/\n/);
68	foreach (@lines) { s/(?:\t|--).*//  } # delete comments
69	my @ops   = map  { split ' ' } @lines; # get op words
70
71	foreach(@ops) {
72	    warn "$tag - $_ already tagged in $seen{$_}\n" if $seen{$_};
73	    $seen{$_} = $tag;
74	    delete $all{$_};
75	}
76	# opset will croak on invalid names
77	define_optag($tag, opset(@ops));
78    }
79    close(DATA);
80    warn "Untagged opnames: ".join(' ',keys %all)."\n" if %all;
81}
82
83
841;
85
86__DATA__
87
88=head1 NAME
89
90Opcode - Disable named opcodes when compiling perl code
91
92=head1 SYNOPSIS
93
94  use Opcode;
95
96
97=head1 DESCRIPTION
98
99Perl code is always compiled into an internal format before execution.
100
101Evaluating perl code (e.g. via "eval" or "do 'file'") causes
102the code to be compiled into an internal format and then,
103provided there was no error in the compilation, executed.
104The internal format is based on many distinct I<opcodes>.
105
106By default no opmask is in effect and any code can be compiled.
107
108The Opcode module allow you to define an I<operator mask> to be in
109effect when perl I<next> compiles any code.  Attempting to compile code
110which contains a masked opcode will cause the compilation to fail
111with an error. The code will not be executed.
112
113=head1 NOTE
114
115The Opcode module is not usually used directly. See the ops pragma and
116Safe modules for more typical uses.
117
118=head1 WARNING
119
120The Opcode module does not implement an effective sandbox for
121evaluating untrusted code with the perl interpreter.
122
123Bugs in the perl interpreter that could be abused to bypass
124Opcode restrictions are not treated as vulnerabilities. See
125L<perlsecpolicy> for additional information.
126
127The authors make B<no warranty>, implied or otherwise, about the
128suitability of this software for safety or security purposes.
129
130The authors shall not in any case be liable for special, incidental,
131consequential, indirect or other similar damages arising from the use
132of this software.
133
134Your mileage will vary. If in any doubt B<do not use it>.
135
136
137=head1 Operator Names and Operator Lists
138
139The canonical list of operator names is the contents of the array
140PL_op_name defined and initialised in file F<opcode.h> of the Perl
141source distribution (and installed into the perl library).
142
143Each operator has both a terse name (its opname) and a more verbose or
144recognisable descriptive name. The opdesc function can be used to
145return a list of descriptions for a list of operators.
146
147Many of the functions and methods listed below take a list of
148operators as parameters. Most operator lists can be made up of several
149types of element. Each element can be one of
150
151=over 8
152
153=item an operator name (opname)
154
155Operator names are typically small lowercase words like enterloop,
156leaveloop, last, next, redo etc. Sometimes they are rather cryptic
157like gv2cv, i_ncmp and ftsvtx.
158
159=item an operator tag name (optag)
160
161Operator tags can be used to refer to groups (or sets) of operators.
162Tag names always begin with a colon. The Opcode module defines several
163optags and the user can define others using the define_optag function.
164
165=item a negated opname or optag
166
167An opname or optag can be prefixed with an exclamation mark, e.g., !mkdir.
168Negating an opname or optag means remove the corresponding ops from the
169accumulated set of ops at that point.
170
171=item an operator set (opset)
172
173An I<opset> as a binary string of approximately 44 bytes which holds a
174set or zero or more operators.
175
176The opset and opset_to_ops functions can be used to convert from
177a list of operators to an opset and I<vice versa>.
178
179Wherever a list of operators can be given you can use one or more opsets.
180See also Manipulating Opsets below.
181
182=back
183
184
185=head1 Opcode Functions
186
187The Opcode package contains functions for manipulating operator names
188tags and sets. All are available for export by the package.
189
190=over 8
191
192=item opcodes
193
194In a scalar context opcodes returns the number of opcodes in this
195version of perl (around 350 for perl-5.7.0).
196
197In a list context it returns a list of all the operator names.
198(Not yet implemented, use @names = opset_to_ops(full_opset).)
199
200=item opset (OP, ...)
201
202Returns an opset containing the listed operators.
203
204=item opset_to_ops (OPSET)
205
206Returns a list of operator names corresponding to those operators in
207the set.
208
209=item opset_to_hex (OPSET)
210
211Returns a string representation of an opset. Can be handy for debugging.
212
213=item full_opset
214
215Returns an opset which includes all operators.
216
217=item empty_opset
218
219Returns an opset which contains no operators.
220
221=item invert_opset (OPSET)
222
223Returns an opset which is the inverse set of the one supplied.
224
225=item verify_opset (OPSET, ...)
226
227Returns true if the supplied opset looks like a valid opset (is the
228right length etc) otherwise it returns false. If an optional second
229parameter is true then verify_opset will croak on an invalid opset
230instead of returning false.
231
232Most of the other Opcode functions call verify_opset automatically
233and will croak if given an invalid opset.
234
235=item define_optag (OPTAG, OPSET)
236
237Define OPTAG as a symbolic name for OPSET. Optag names always start
238with a colon C<:>.
239
240The optag name used must not be defined already (define_optag will
241croak if it is already defined). Optag names are global to the perl
242process and optag definitions cannot be altered or deleted once
243defined.
244
245It is strongly recommended that applications using Opcode should use a
246leading capital letter on their tag names since lowercase names are
247reserved for use by the Opcode module. If using Opcode within a module
248you should prefix your tags names with the name of your module to
249ensure uniqueness and thus avoid clashes with other modules.
250
251=item opmask_add (OPSET)
252
253Adds the supplied opset to the current opmask. Note that there is
254currently I<no> mechanism for unmasking ops once they have been masked.
255This is intentional.
256
257=item opmask
258
259Returns an opset corresponding to the current opmask.
260
261=item opdesc (OP, ...)
262
263This takes a list of operator names and returns the corresponding list
264of operator descriptions.
265
266=item opdump (PAT)
267
268Dumps to STDOUT a two column list of op names and op descriptions.
269If an optional pattern is given then only lines which match the
270(case insensitive) pattern will be output.
271
272It's designed to be used as a handy command line utility:
273
274	perl -MOpcode=opdump -e opdump
275	perl -MOpcode=opdump -e 'opdump Eval'
276
277=back
278
279=head1 Manipulating Opsets
280
281Opsets may be manipulated using the perl bit vector operators & (and), | (or),
282^ (xor) and ~ (negate/invert).
283
284However you should never rely on the numerical position of any opcode
285within the opset. In other words both sides of a bit vector operator
286should be opsets returned from Opcode functions.
287
288Also, since the number of opcodes in your current version of perl might
289not be an exact multiple of eight, there may be unused bits in the last
290byte of an upset. This should not cause any problems (Opcode functions
291ignore those extra bits) but it does mean that using the ~ operator
292will typically not produce the same 'physical' opset 'string' as the
293invert_opset function.
294
295
296=head1 TO DO (maybe)
297
298    $bool = opset_eq($opset1, $opset2)	true if opsets are logically
299					equivalent
300    $yes = opset_can($opset, @ops)	true if $opset has all @ops set
301
302    @diff = opset_diff($opset1, $opset2) => ('foo', '!bar', ...)
303
304=cut
305
306# the =cut above is used by _init_optags() to get here quickly
307
308=head1 Predefined Opcode Tags
309
310=over 5
311
312=item :base_core
313
314    null stub scalar pushmark wantarray const defined undef
315
316    rv2sv sassign
317
318    rv2av aassign aelem aelemfast aelemfast_lex aslice kvaslice
319    av2arylen
320
321    rv2hv helem hslice kvhslice each values keys exists delete
322    aeach akeys avalues multideref argelem argdefelem argcheck
323
324    preinc i_preinc predec i_predec postinc i_postinc
325    postdec i_postdec int hex oct abs pow multiply i_multiply
326    divide i_divide modulo i_modulo add i_add subtract i_subtract
327
328    left_shift right_shift bit_and bit_xor bit_or nbit_and
329    nbit_xor nbit_or sbit_and sbit_xor sbit_or negate i_negate not
330    complement ncomplement scomplement
331
332    lt i_lt gt i_gt le i_le ge i_ge eq i_eq ne i_ne ncmp i_ncmp
333    slt sgt sle sge seq sne scmp
334    isa
335
336    substr vec stringify study pos length index rindex ord chr
337
338    ucfirst lcfirst uc lc fc quotemeta trans transr chop schop
339    chomp schomp
340
341    match split qr
342
343    list lslice splice push pop shift unshift reverse
344
345    cond_expr flip flop andassign orassign dorassign and or dor xor
346
347    warn die lineseq nextstate scope enter leave
348
349    rv2cv anoncode prototype coreargs avhvswitch anonconst
350
351    entersub leavesub leavesublv return method method_named
352    method_super method_redir method_redir_super
353     -- XXX loops via recursion?
354
355    cmpchain_and cmpchain_dup
356
357    leaveeval -- needed for Safe to operate, is safe
358		 without entereval
359
360=item :base_mem
361
362These memory related ops are not included in :base_core because they
363can easily be used to implement a resource attack (e.g., consume all
364available memory).
365
366    concat multiconcat repeat join range
367
368    anonlist anonhash
369
370Note that despite the existence of this optag a memory resource attack
371may still be possible using only :base_core ops.
372
373Disabling these ops is a I<very> heavy handed way to attempt to prevent
374a memory resource attack. It's probable that a specific memory limit
375mechanism will be added to perl in the near future.
376
377=item :base_loop
378
379These loop ops are not included in :base_core because they can easily be
380used to implement a resource attack (e.g., consume all available CPU time).
381
382    grepstart grepwhile
383    mapstart mapwhile
384    enteriter iter
385    enterloop leaveloop unstack
386    last next redo
387    goto
388
389=item :base_io
390
391These ops enable I<filehandle> (rather than filename) based input and
392output. These are safe on the assumption that only pre-existing
393filehandles are available for use.  Usually, to create new filehandles
394other ops such as open would need to be enabled, if you don't take into
395account the magical open of ARGV.
396
397    readline rcatline getc read
398
399    formline enterwrite leavewrite
400
401    print say sysread syswrite send recv
402
403    eof tell seek sysseek
404
405    readdir telldir seekdir rewinddir
406
407=item :base_orig
408
409These are a hotchpotch of opcodes still waiting to be considered
410
411    gvsv gv gelem
412
413    padsv padav padhv padcv padany padrange introcv clonecv
414
415    once
416
417    rv2gv refgen srefgen ref refassign lvref lvrefslice lvavref
418
419    bless -- could be used to change ownership of objects
420	     (reblessing)
421
422     regcmaybe regcreset regcomp subst substcont
423
424    sprintf prtf -- can core dump
425
426    crypt
427
428    tie untie
429
430    dbmopen dbmclose
431    sselect select
432    pipe_op sockpair
433
434    getppid getpgrp setpgrp getpriority setpriority
435    localtime gmtime
436
437    entertry leavetry -- can be used to 'hide' fatal errors
438
439    entergiven leavegiven
440    enterwhen leavewhen
441    break continue
442    smartmatch
443
444    custom -- where should this go
445
446=item :base_math
447
448These ops are not included in :base_core because of the risk of them being
449used to generate floating point exceptions (which would have to be caught
450using a $SIG{FPE} handler).
451
452    atan2 sin cos exp log sqrt
453
454These ops are not included in :base_core because they have an effect
455beyond the scope of the compartment.
456
457    rand srand
458
459=item :base_thread
460
461These ops are related to multi-threading.
462
463    lock
464
465=item :default
466
467A handy tag name for a I<reasonable> default set of ops.  (The current ops
468allowed are unstable while development continues. It will change.)
469
470    :base_core :base_mem :base_loop :base_orig :base_thread
471
472This list used to contain :base_io prior to Opcode 1.07.
473
474If safety matters to you (and why else would you be using the Opcode module?)
475then you should not rely on the definition of this, or indeed any other, optag!
476
477=item :filesys_read
478
479    stat lstat readlink
480
481    ftatime ftblk ftchr ftctime ftdir fteexec fteowned
482    fteread ftewrite ftfile ftis ftlink ftmtime ftpipe
483    ftrexec ftrowned ftrread ftsgid ftsize ftsock ftsuid
484    fttty ftzero ftrwrite ftsvtx
485
486    fttext ftbinary
487
488    fileno
489
490=item :sys_db
491
492    ghbyname ghbyaddr ghostent shostent ehostent      -- hosts
493    gnbyname gnbyaddr gnetent snetent enetent         -- networks
494    gpbyname gpbynumber gprotoent sprotoent eprotoent -- protocols
495    gsbyname gsbyport gservent sservent eservent      -- services
496
497    gpwnam gpwuid gpwent spwent epwent getlogin       -- users
498    ggrnam ggrgid ggrent sgrent egrent                -- groups
499
500=item :browse
501
502A handy tag name for a I<reasonable> default set of ops beyond the
503:default optag.  Like :default (and indeed all the other optags) its
504current definition is unstable while development continues. It will change.
505
506The :browse tag represents the next step beyond :default. It is a
507superset of the :default ops and adds :filesys_read the :sys_db.
508The intent being that scripts can access more (possibly sensitive)
509information about your system but not be able to change it.
510
511    :default :filesys_read :sys_db
512
513=item :filesys_open
514
515    sysopen open close
516    umask binmode
517
518    open_dir closedir -- other dir ops are in :base_io
519
520=item :filesys_write
521
522    link unlink rename symlink truncate
523
524    mkdir rmdir
525
526    utime chmod chown
527
528    fcntl -- not strictly filesys related, but possibly as
529	     dangerous?
530
531=item :subprocess
532
533    backtick system
534
535    fork
536
537    wait waitpid
538
539    glob -- access to Cshell via <`rm *`>
540
541=item :ownprocess
542
543    exec exit kill
544
545    time tms -- could be used for timing attacks (paranoid?)
546
547=item :others
548
549This tag holds groups of assorted specialist opcodes that don't warrant
550having optags defined for them.
551
552SystemV Interprocess Communications:
553
554    msgctl msgget msgrcv msgsnd
555
556    semctl semget semop
557
558    shmctl shmget shmread shmwrite
559
560=item :load
561
562This tag holds opcodes related to loading modules and getting information
563about calling environment and args.
564
565    require dofile
566    caller runcv
567
568=item :still_to_be_decided
569
570    chdir
571    flock ioctl
572
573    socket getpeername ssockopt
574    bind connect listen accept shutdown gsockopt getsockname
575
576    sleep alarm -- changes global timer state and signal handling
577    sort -- assorted problems including core dumps
578    tied -- can be used to access object implementing a tie
579    pack unpack -- can be used to create/use memory pointers
580
581    hintseval -- constant op holding eval hints
582
583    entereval -- can be used to hide code from initial compile
584
585    reset
586
587    dbstate -- perl -d version of nextstate(ment) opcode
588
589=item :dangerous
590
591This tag is simply a bucket for opcodes that are unlikely to be used via
592a tag name but need to be tagged for completeness and documentation.
593
594    syscall dump chroot
595
596=back
597
598=head1 SEE ALSO
599
600L<ops> -- perl pragma interface to Opcode module.
601
602L<Safe> -- Opcode and namespace limited execution compartments
603
604=head1 AUTHORS
605
606Originally designed and implemented by Malcolm Beattie,
607mbeattie@sable.ox.ac.uk as part of Safe version 1.
608
609Split out from Safe module version 1, named opcode tags and other
610changes added by Tim Bunce.
611
612=cut
613
614