1# Paranoid::Input -- Paranoid Input functions
2#
3# $Id: lib/Paranoid/Input.pm, 2.08 2020/12/31 12:10:06 acorliss Exp $
4#
5# This software is free software.  Similar to Perl, you can redistribute it
6# and/or modify it under the terms of either:
7#
8#   a)     the GNU General Public License
9#          <https://www.gnu.org/licenses/gpl-1.0.html> as published by the
10#          Free Software Foundation <http://www.fsf.org/>; either version 1
11#          <https://www.gnu.org/licenses/gpl-1.0.html>, or any later version
12#          <https://www.gnu.org/licenses/license-list.html#GNUGPL>, or
13#   b)     the Artistic License 2.0
14#          <https://opensource.org/licenses/Artistic-2.0>,
15#
16# subject to the following additional term:  No trademark rights to
17# "Paranoid" have been or are conveyed under any of the above licenses.
18# However, "Paranoid" may be used fairly to describe this unmodified
19# software, in good faith, but not as a trademark.
20#
21# (c) 2005 - 2020, Arthur Corliss (corliss@digitalmages.com)
22# (tm) 2008 - 2020, Paranoid Inc. (www.paranoid.com)
23#
24#####################################################################
25
26#####################################################################
27#
28# Environment definitions
29#
30#####################################################################
31
32package Paranoid::Input;
33
34use 5.008;
35
36use strict;
37use warnings;
38use vars qw($VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS);
39use base qw(Exporter);
40use Paranoid;
41use Paranoid::Debug qw(:all);
42use Carp;
43
44($VERSION) = ( q$Revision: 2.08 $ =~ /(\d+(?:\.\d+)+)/sm );
45
46@EXPORT      = qw(detaint stringMatch pchomp);
47@EXPORT_OK   = ( @EXPORT, qw(NEWLINE_REGEX) );
48%EXPORT_TAGS = ( all => [@EXPORT_OK], );
49
50use constant NEWLINE_REGEX => qr#(?:\15\12|\15|\12)#so;
51
52#####################################################################
53#
54# Module code follows
55#
56#####################################################################
57
58sub pchomp (;\[$@%]) {
59
60    # Purpose:  Platform neutral chomping
61    # Returns:  same as chomp
62    # Usage:    $n = pchomp($string);
63
64    my ($ref) = @_;
65    my $rv    = 0;
66    my $nl    = NEWLINE_REGEX;
67    my $e;
68
69    # If no args were passed work on $_
70    $ref = \$_ unless @_;
71
72    # slurp-mode bypass
73    return $rv unless defined $/;
74
75    if ( ref $ref eq 'SCALAR' and defined $$ref ) {
76        if ( $/ =~ /^$nl$/so ) {
77            $e = length $$ref;
78            $$ref =~ s/$nl$//so;
79            $rv = $e - length $$ref;
80        } else {
81            $rv = chomp $$ref;
82        }
83    } elsif ( ref $ref eq 'ARRAY' ) {
84        if ( $/ =~ /^$nl$/so ) {
85            foreach (@$ref) {
86                next unless defined;
87                $e = length $_;
88                $_ =~ s/$nl$//so;
89                $rv += $e - length $_;
90            }
91        } else {
92            $rv = chomp @$ref;
93        }
94    } elsif ( ref $ref eq 'HASH' ) {
95        if ( $/ =~ /^$nl$/so ) {
96            foreach ( keys %$ref ) {
97                next unless defined $$ref{$_};
98                $e = length $$ref{$_};
99                $$ref{$_} =~ s/$nl$//so;
100                $rv += $e - length $$ref{$_};
101            }
102        } else {
103            $rv = chomp %$ref;
104        }
105    }
106
107    return $rv;
108}
109
110our %regexes = (
111    alphabetic   => qr/[a-z]+/si,
112    alphanumeric => qr/[a-z0-9]+/si,
113    alphawhite   => qr/[a-z\s]+/si,
114    alnumwhite   => qr/[a-z0-9\s]+/si,
115    email        => qr/[a-z][\w\.\-]*\@(?:[a-z0-9][a-z0-9\-]*\.)*[a-z0-9]+/si,
116    filename     => qr#[/ \w\-\.:,@\+]+\[?#s,
117    fileglob     => qr#[/ \w\-\.:,@\+\*\?\{\}\[\]]+\[?#s,
118    hostname     => qr#(?:[a-z0-9][a-z0-9\-]*)(?:\.[a-z0-9][a-z0-9\-]*)*\.?#s,
119    ipv4addr =>
120        qr/(?:(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])\.){3}(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])/s,
121    ipv4netaddr =>
122        qr#(?:(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])\.){3}(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])/(?:(?:\d|[12]\d|3[0-2])|(?:(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])\.){3}(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5]))#s,
123    ipv6addr => qr/
124        :(?::[abcdef\d]{1,4}){1,7}                 |
125        [abcdef\d]{1,4}(?:::?[abcdef\d]{1,4}){1,7} |
126        (?:[abcdef\d]{1,4}:){1,7}:
127        /six,
128    ipv6netaddr => qr#(?::(?::[abcdef\d]{1,4}){1,7}|
129        [abcdef\d]{1,4}(?:::?[abcdef\d]{1,4}){1,7} |
130        (?:[abcdef\d]{1,4}:){1,7}:)/(?:\d\d?|1(?:[01]\d|2[0-8]))#six,
131    login  => qr/[a-z][\w\.\-]*/si,
132    nometa => qr/[^\%\`\$\!\@]+/s,
133    number => qr/[+\-]?[0-9]+(?:\.[0-9]+)?/s,
134    'int'  => qr/[-+]?\d+/s,
135    uint   => qr/\d+/s,
136    float  => qr/[-+]?\d+(?:\.\d+)/s,
137    ufloat => qr/\d+(?:\.\d+)/s,
138    bin    => qr/[01]+/s,
139    octal  => qr/[0-7]+/s,
140    'hex'  => qr/[a-z0-9]+/si,
141    );
142
143sub detaint (\[$@%]$;\[$@%]) {
144
145    # Purpose:  Detaints and validates input in one call
146    # Returns:  True (1) if detainting was successful,
147    #           False (0) if there are any errors
148    # Usage:    $rv = detaint($input, $dataType, $detainted);
149    # Usage:    $rv = detaint(@input, $dataType, @detainted);
150    # Usage:    $rv = detaint(%input, $dataType, %detainted);
151
152    my $iref = shift;
153    my $type = shift;
154    my $oref = shift;
155    my $po   = defined $oref;
156    my $rv   = 0;
157    my ( $regex, $tmp );
158
159    pdebug( 'entering w/(%s)(%s)(%s)', PDLEVEL1, $iref, $type, $oref );
160    pIn();
161
162    # Make sure input and output data types match
163    croak "$iref and $oref aren't compatible data types"
164        unless !defined $oref
165            or ref $iref eq ref $oref;
166
167    # Warn on unknown regexes
168    if ( ref $type eq 'Regexp' ) {
169        $regex = $type;
170        $type  = 'custom';
171    } else {
172        if ( defined $type and exists $regexes{$type} ) {
173            $regex = $regexes{$type};
174        } else {
175            pdebug( 'unknown regex type requested: %s', PDLEVEL1, $type );
176        }
177    }
178
179    # Create a reference structure under $oref if none was passed
180    unless ( defined $oref ) {
181        $oref =
182              ref $iref eq 'ARRAY' ? []
183            : ref $iref eq 'HASH'  ? {}
184            :                        \$tmp;
185    }
186
187    # Make sure $oref is empty
188    if ( ref $oref eq 'SCALAR' ) {
189        $$oref = undef;
190    } elsif ( ref $oref eq 'ARRAY' ) {
191        @$oref = ();
192    } else {
193        %$oref = ();
194    }
195
196    # Start working
197    if ( defined $regex ) {
198        if ( ref $iref eq 'SCALAR' ) {
199            pdebug( 'evaluating (%s)', PDLEVEL2, $$iref );
200            ($$oref) = ( $$iref =~ /^($regex)$/s )
201                if defined $$iref;
202            $rv = defined $$oref;
203        } elsif ( ref $iref eq 'ARRAY' ) {
204            if ( scalar @$iref ) {
205                $rv = 1;
206                foreach (@$iref) {
207                    pdebug( 'evaluating (%s)', PDLEVEL2, $_ );
208                    ( $$oref[ $#{$oref} + 1 ] ) =
209                        defined $_ ? m/^($regex)$/s : (undef);
210                    $rv = 0 unless defined $$oref[-1];
211                    pdebug( 'got (%s)', PDLEVEL2, $$oref[-1] );
212                }
213            }
214            $rv = !scalar grep { !defined } @$oref;
215        } else {
216            if ( scalar keys %$iref ) {
217                $rv = 1;
218                foreach ( keys %$iref ) {
219                    pdebug( 'evaluating (%s)', PDLEVEL2, $$iref{$_} );
220                    ( $$oref{$_} ) =
221                        defined $$iref{$_}
222                        ? ( $$iref{$_} =~ m/^($regex)$/s )
223                        : undef;
224                    $rv = 0 unless defined $$oref{$_};
225                }
226            }
227        }
228    }
229
230    # Copy everything back to $iref if needed
231    unless ($po) {
232        if ( ref $iref eq 'SCALAR' ) {
233            $$iref = $$oref;
234        } elsif ( ref $iref eq 'ARRAY' ) {
235            @$iref = @$oref;
236        } else {
237            %$iref = %$oref;
238        }
239    }
240
241    pOut();
242    pdebug( 'leaving w/rv: %s', PDLEVEL1, $rv );
243
244    return $rv;
245}
246
247sub stringMatch ($@) {
248
249    # Purpose:  Looks for occurrences of strings and/or regexes in the passed
250    #           input
251    # Returns:  True (1) any of the strings/regexes match,
252    #           False (0), otherwise
253    # Usage:    $rv = stringMatch($input, @words);
254
255    my $input = shift;
256    my @match = splice @_;
257    my $rv    = 0;
258    my @regex;
259
260    pdebug( 'entering w/(%s)(%s)', PDLEVEL1, $input, @match );
261    pIn();
262
263    if ( defined $input and @match ) {
264
265        # Populate @regex w/regexes
266        @regex = grep { defined $_ && ref $_ eq 'Regexp' } @match;
267
268        # Convert remaining strings to regexes
269        foreach ( grep { defined $_ && ref $_ ne 'Regexp' } @match ) {
270            push @regex, m#^/(.+)/$#s ? qr#$1#si : qr#\Q$_\E#si;
271        }
272
273        # Start comparisons
274        study $input;
275        foreach my $r (@regex) {
276            if ( $input =~ /$r/si ) {
277                $rv = 1;
278                last;
279            }
280        }
281    }
282
283    pOut();
284    pdebug( 'leaving w/rv: %s', PDLEVEL1, $rv );
285
286    return $rv;
287}
288
2891;
290
291__END__
292
293=head1 NAME
294
295Paranoid::Input - Paranoid input functions
296
297=head1 VERSION
298
299$Id: lib/Paranoid/Input.pm, 2.08 2020/12/31 12:10:06 acorliss Exp $
300
301=head1 SYNOPSIS
302
303  use Paranoid::Input;
304
305  $rv = detaint($userInput, "login", $detainted);
306  $rv = detaint(@userInput, "login", @detainted);
307  $rv = detaint(%userInput, "login", %detainted);
308
309  $rv = detaint($input, qr#\w+\s+\d+#s);
310  $rv = detaint(@input, qr#\w+\s+\d+#s);
311  $rv = detaint(%input, qr#\w+\s+\d+#s);
312
313  $rv = stringMatch($input, @strings);
314
315  $Paranoid::Input::regexes{'new_type"} = qr/\w\s+\d+/s;
316
317  $rv = pchomp($lines);
318  $rv = pchomp(@lines);
319  $rv = pchomp(%dict);
320
321  # Chomp $_
322  $rv = pchomp();
323
324=head1 DESCRIPTION
325
326This provides some generic functions for working with text-based input.  The
327main benefirst of this module is a relatively simple way of validating and
328detainting formatted text and performing platform-agnostic chomps.
329
330=head1 IMPORT LISTS
331
332This module exports the following symbols by default:
333
334    detaint stringMatch pchomp
335
336The following specialized import lists also exist:
337
338    List        Members
339    --------------------------------------------------------
340    all         @defaults NEWLINE_REGEX
341
342=head1 VARIABLES
343
344=head2 NEWLINE_REGEX
345
346This returns regular expression that matches against DOS, UNIX, and legacy Mac
347line terminators.  This is the regular expression used internally by L<pchomp>
348to perform platform-agnostic chomps.
349
350This is only exported if explicity requested, or under an import target of
351B<:all>.
352
353=head1 SUBROUTINES/METHODS
354
355=head2 detaint
356
357  $rv = detaint($userInput, "login", $val);
358
359This function populates the passed data object with the detainted input from the
360first argument.  The second argument specifies the type of data in the first
361argument, and is used to validate the input before detainting.  If you don't
362want to use one of the built-in regular expressions you can, instead, pass
363your own custom regular expression.
364
365The third argument is optional, but if used, must match the first argument's
366data type.  If it is omitted all detainted values are used to overwrite the
367contents of the first argument.  If detaint fails for any reason B<undef> is
368used instead.
369
370If the first argument fails to match against these regular expressions the
371function will return 0.  If the string passed is either undefined or a
372zero-length string it will also return 0.  And finally, if you attempt to use
373an unknown (or unregistered) data type it will also return 0, and log an error
374message in B<Paranoid::ERROR>.
375
376The following regular expressions are known by name:
377
378    Name            Description
379    =========================================================
380    alphabetic      Alphabetic characters
381    alphanumeric    Alphabetic/numeric characters
382    alphawhite      Alphabetic/whitespace characters
383    alnumwhite      Alphabetic/numeric/whitespace characters
384    email           RFC 822 Email address format
385    filename        Essentially no-metacharacters
386    fileglob        Same as filename, but with glob meta-
387                    character support
388    hostname        Alphanumeric/hyphenated host names
389    ipv4addr        IPv4 address
390    ipv4netaddr     IPv4 network address (CIDR/dotted quad)
391    ipv6addr        IPv6 address
392    ipv6netaddr     IPv6 network address (CIDR)
393    login           UNIX login format
394    nometa          Everything but meta-characters
395    number          Integer/float/signed/unsigned
396    int             Integer/signed/unsigned
397    uint            Integer/unsigned
398    float           Float/signed/unsigned
399    ufloat          Float/unsigned
400    bin             binary
401    octal           octal
402    hex             hexadecimal
403
404=head2 stringMatch
405
406  $rv = stringMatch($input, @strings);
407
408This function does a multiline case insensitive regex match against the
409input for every string passed for matching.  This does safe quoted matches
410(\Q$string\E) for all the strings, unless the string is a perl Regexp
411(defined with qr//) or begins and ends with /.
412
413B<NOTE>: this performs a study in hopes that for a large number of regexes
414will be performed faster.  This may not always be the case.
415
416=head2 pchomp
417
418    $rv = pchomp(@lines);
419
420B<pchomp> is meant to be a drop-in replacement for chomp, primarily where you
421want it to work as a platform-agnostic line chomper.  If I<$/> is altered in
422any manner (slurp mode, fixed record length, etc.) it will assume that's not
423important and automatically call B<chomp> instead.  It should, then, be safe
424to be called in all instances in which you'd call B<chomp> itself.
425
426In a nutshell, this function attempts to avoid the assumption that B<chomp>
427makes in that the latter assumes that all input it works upon was authored on
428the same system, using the same input record separators.  Using B<pchomp> in
429lieu of B<chomp> will allow you to treat DOS, UNIX, and Mac-authored files
430identically with no additional coding.
431
432Because it is assumed that B<pchomp> will be used in potentially high
433frequency scenarios no B<pdebug> calls are made within it to avoid exercising
434the stack any more than necessary.  It is hoped that the relative simplicity
435of the subroutine should make debug use unnecessary.
436
437=head1 DEPENDENCIES
438
439=over
440
441=item o
442
443L<Carp>
444
445=item o
446
447L<Paranoid>
448
449=item o
450
451L<Paranoid::Debug>
452
453=back
454
455=head1 BUGS AND LIMITATIONS
456
457=head1 AUTHOR
458
459Arthur Corliss (corliss@digitalmages.com)
460
461=head1 LICENSE AND COPYRIGHT
462
463This software is free software.  Similar to Perl, you can redistribute it
464and/or modify it under the terms of either:
465
466  a)     the GNU General Public License
467         <https://www.gnu.org/licenses/gpl-1.0.html> as published by the
468         Free Software Foundation <http://www.fsf.org/>; either version 1
469         <https://www.gnu.org/licenses/gpl-1.0.html>, or any later version
470         <https://www.gnu.org/licenses/license-list.html#GNUGPL>, or
471  b)     the Artistic License 2.0
472         <https://opensource.org/licenses/Artistic-2.0>,
473
474subject to the following additional term:  No trademark rights to
475"Paranoid" have been or are conveyed under any of the above licenses.
476However, "Paranoid" may be used fairly to describe this unmodified
477software, in good faith, but not as a trademark.
478
479(c) 2005 - 2020, Arthur Corliss (corliss@digitalmages.com)
480(tm) 2008 - 2020, Paranoid Inc. (www.paranoid.com)
481
482