1# Paranoid::Input -- Paranoid Input functions 2# 3# $Id: lib/Paranoid/Input.pm, 2.08 2020/12/31 12:10:06 acorliss Exp $ 4# 5# This software is free software. Similar to Perl, you can redistribute it 6# and/or modify it under the terms of either: 7# 8# a) the GNU General Public License 9# <https://www.gnu.org/licenses/gpl-1.0.html> as published by the 10# Free Software Foundation <http://www.fsf.org/>; either version 1 11# <https://www.gnu.org/licenses/gpl-1.0.html>, or any later version 12# <https://www.gnu.org/licenses/license-list.html#GNUGPL>, or 13# b) the Artistic License 2.0 14# <https://opensource.org/licenses/Artistic-2.0>, 15# 16# subject to the following additional term: No trademark rights to 17# "Paranoid" have been or are conveyed under any of the above licenses. 18# However, "Paranoid" may be used fairly to describe this unmodified 19# software, in good faith, but not as a trademark. 20# 21# (c) 2005 - 2020, Arthur Corliss (corliss@digitalmages.com) 22# (tm) 2008 - 2020, Paranoid Inc. (www.paranoid.com) 23# 24##################################################################### 25 26##################################################################### 27# 28# Environment definitions 29# 30##################################################################### 31 32package Paranoid::Input; 33 34use 5.008; 35 36use strict; 37use warnings; 38use vars qw($VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS); 39use base qw(Exporter); 40use Paranoid; 41use Paranoid::Debug qw(:all); 42use Carp; 43 44($VERSION) = ( q$Revision: 2.08 $ =~ /(\d+(?:\.\d+)+)/sm ); 45 46@EXPORT = qw(detaint stringMatch pchomp); 47@EXPORT_OK = ( @EXPORT, qw(NEWLINE_REGEX) ); 48%EXPORT_TAGS = ( all => [@EXPORT_OK], ); 49 50use constant NEWLINE_REGEX => qr#(?:\15\12|\15|\12)#so; 51 52##################################################################### 53# 54# Module code follows 55# 56##################################################################### 57 58sub pchomp (;\[$@%]) { 59 60 # Purpose: Platform neutral chomping 61 # Returns: same as chomp 62 # Usage: $n = pchomp($string); 63 64 my ($ref) = @_; 65 my $rv = 0; 66 my $nl = NEWLINE_REGEX; 67 my $e; 68 69 # If no args were passed work on $_ 70 $ref = \$_ unless @_; 71 72 # slurp-mode bypass 73 return $rv unless defined $/; 74 75 if ( ref $ref eq 'SCALAR' and defined $$ref ) { 76 if ( $/ =~ /^$nl$/so ) { 77 $e = length $$ref; 78 $$ref =~ s/$nl$//so; 79 $rv = $e - length $$ref; 80 } else { 81 $rv = chomp $$ref; 82 } 83 } elsif ( ref $ref eq 'ARRAY' ) { 84 if ( $/ =~ /^$nl$/so ) { 85 foreach (@$ref) { 86 next unless defined; 87 $e = length $_; 88 $_ =~ s/$nl$//so; 89 $rv += $e - length $_; 90 } 91 } else { 92 $rv = chomp @$ref; 93 } 94 } elsif ( ref $ref eq 'HASH' ) { 95 if ( $/ =~ /^$nl$/so ) { 96 foreach ( keys %$ref ) { 97 next unless defined $$ref{$_}; 98 $e = length $$ref{$_}; 99 $$ref{$_} =~ s/$nl$//so; 100 $rv += $e - length $$ref{$_}; 101 } 102 } else { 103 $rv = chomp %$ref; 104 } 105 } 106 107 return $rv; 108} 109 110our %regexes = ( 111 alphabetic => qr/[a-z]+/si, 112 alphanumeric => qr/[a-z0-9]+/si, 113 alphawhite => qr/[a-z\s]+/si, 114 alnumwhite => qr/[a-z0-9\s]+/si, 115 email => qr/[a-z][\w\.\-]*\@(?:[a-z0-9][a-z0-9\-]*\.)*[a-z0-9]+/si, 116 filename => qr#[/ \w\-\.:,@\+]+\[?#s, 117 fileglob => qr#[/ \w\-\.:,@\+\*\?\{\}\[\]]+\[?#s, 118 hostname => qr#(?:[a-z0-9][a-z0-9\-]*)(?:\.[a-z0-9][a-z0-9\-]*)*\.?#s, 119 ipv4addr => 120 qr/(?:(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])\.){3}(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])/s, 121 ipv4netaddr => 122 qr#(?:(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])\.){3}(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])/(?:(?:\d|[12]\d|3[0-2])|(?:(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5])\.){3}(?:\d\d?|1\d\d|2[0-4][0-9]|25[0-5]))#s, 123 ipv6addr => qr/ 124 :(?::[abcdef\d]{1,4}){1,7} | 125 [abcdef\d]{1,4}(?:::?[abcdef\d]{1,4}){1,7} | 126 (?:[abcdef\d]{1,4}:){1,7}: 127 /six, 128 ipv6netaddr => qr#(?::(?::[abcdef\d]{1,4}){1,7}| 129 [abcdef\d]{1,4}(?:::?[abcdef\d]{1,4}){1,7} | 130 (?:[abcdef\d]{1,4}:){1,7}:)/(?:\d\d?|1(?:[01]\d|2[0-8]))#six, 131 login => qr/[a-z][\w\.\-]*/si, 132 nometa => qr/[^\%\`\$\!\@]+/s, 133 number => qr/[+\-]?[0-9]+(?:\.[0-9]+)?/s, 134 'int' => qr/[-+]?\d+/s, 135 uint => qr/\d+/s, 136 float => qr/[-+]?\d+(?:\.\d+)/s, 137 ufloat => qr/\d+(?:\.\d+)/s, 138 bin => qr/[01]+/s, 139 octal => qr/[0-7]+/s, 140 'hex' => qr/[a-z0-9]+/si, 141 ); 142 143sub detaint (\[$@%]$;\[$@%]) { 144 145 # Purpose: Detaints and validates input in one call 146 # Returns: True (1) if detainting was successful, 147 # False (0) if there are any errors 148 # Usage: $rv = detaint($input, $dataType, $detainted); 149 # Usage: $rv = detaint(@input, $dataType, @detainted); 150 # Usage: $rv = detaint(%input, $dataType, %detainted); 151 152 my $iref = shift; 153 my $type = shift; 154 my $oref = shift; 155 my $po = defined $oref; 156 my $rv = 0; 157 my ( $regex, $tmp ); 158 159 pdebug( 'entering w/(%s)(%s)(%s)', PDLEVEL1, $iref, $type, $oref ); 160 pIn(); 161 162 # Make sure input and output data types match 163 croak "$iref and $oref aren't compatible data types" 164 unless !defined $oref 165 or ref $iref eq ref $oref; 166 167 # Warn on unknown regexes 168 if ( ref $type eq 'Regexp' ) { 169 $regex = $type; 170 $type = 'custom'; 171 } else { 172 if ( defined $type and exists $regexes{$type} ) { 173 $regex = $regexes{$type}; 174 } else { 175 pdebug( 'unknown regex type requested: %s', PDLEVEL1, $type ); 176 } 177 } 178 179 # Create a reference structure under $oref if none was passed 180 unless ( defined $oref ) { 181 $oref = 182 ref $iref eq 'ARRAY' ? [] 183 : ref $iref eq 'HASH' ? {} 184 : \$tmp; 185 } 186 187 # Make sure $oref is empty 188 if ( ref $oref eq 'SCALAR' ) { 189 $$oref = undef; 190 } elsif ( ref $oref eq 'ARRAY' ) { 191 @$oref = (); 192 } else { 193 %$oref = (); 194 } 195 196 # Start working 197 if ( defined $regex ) { 198 if ( ref $iref eq 'SCALAR' ) { 199 pdebug( 'evaluating (%s)', PDLEVEL2, $$iref ); 200 ($$oref) = ( $$iref =~ /^($regex)$/s ) 201 if defined $$iref; 202 $rv = defined $$oref; 203 } elsif ( ref $iref eq 'ARRAY' ) { 204 if ( scalar @$iref ) { 205 $rv = 1; 206 foreach (@$iref) { 207 pdebug( 'evaluating (%s)', PDLEVEL2, $_ ); 208 ( $$oref[ $#{$oref} + 1 ] ) = 209 defined $_ ? m/^($regex)$/s : (undef); 210 $rv = 0 unless defined $$oref[-1]; 211 pdebug( 'got (%s)', PDLEVEL2, $$oref[-1] ); 212 } 213 } 214 $rv = !scalar grep { !defined } @$oref; 215 } else { 216 if ( scalar keys %$iref ) { 217 $rv = 1; 218 foreach ( keys %$iref ) { 219 pdebug( 'evaluating (%s)', PDLEVEL2, $$iref{$_} ); 220 ( $$oref{$_} ) = 221 defined $$iref{$_} 222 ? ( $$iref{$_} =~ m/^($regex)$/s ) 223 : undef; 224 $rv = 0 unless defined $$oref{$_}; 225 } 226 } 227 } 228 } 229 230 # Copy everything back to $iref if needed 231 unless ($po) { 232 if ( ref $iref eq 'SCALAR' ) { 233 $$iref = $$oref; 234 } elsif ( ref $iref eq 'ARRAY' ) { 235 @$iref = @$oref; 236 } else { 237 %$iref = %$oref; 238 } 239 } 240 241 pOut(); 242 pdebug( 'leaving w/rv: %s', PDLEVEL1, $rv ); 243 244 return $rv; 245} 246 247sub stringMatch ($@) { 248 249 # Purpose: Looks for occurrences of strings and/or regexes in the passed 250 # input 251 # Returns: True (1) any of the strings/regexes match, 252 # False (0), otherwise 253 # Usage: $rv = stringMatch($input, @words); 254 255 my $input = shift; 256 my @match = splice @_; 257 my $rv = 0; 258 my @regex; 259 260 pdebug( 'entering w/(%s)(%s)', PDLEVEL1, $input, @match ); 261 pIn(); 262 263 if ( defined $input and @match ) { 264 265 # Populate @regex w/regexes 266 @regex = grep { defined $_ && ref $_ eq 'Regexp' } @match; 267 268 # Convert remaining strings to regexes 269 foreach ( grep { defined $_ && ref $_ ne 'Regexp' } @match ) { 270 push @regex, m#^/(.+)/$#s ? qr#$1#si : qr#\Q$_\E#si; 271 } 272 273 # Start comparisons 274 study $input; 275 foreach my $r (@regex) { 276 if ( $input =~ /$r/si ) { 277 $rv = 1; 278 last; 279 } 280 } 281 } 282 283 pOut(); 284 pdebug( 'leaving w/rv: %s', PDLEVEL1, $rv ); 285 286 return $rv; 287} 288 2891; 290 291__END__ 292 293=head1 NAME 294 295Paranoid::Input - Paranoid input functions 296 297=head1 VERSION 298 299$Id: lib/Paranoid/Input.pm, 2.08 2020/12/31 12:10:06 acorliss Exp $ 300 301=head1 SYNOPSIS 302 303 use Paranoid::Input; 304 305 $rv = detaint($userInput, "login", $detainted); 306 $rv = detaint(@userInput, "login", @detainted); 307 $rv = detaint(%userInput, "login", %detainted); 308 309 $rv = detaint($input, qr#\w+\s+\d+#s); 310 $rv = detaint(@input, qr#\w+\s+\d+#s); 311 $rv = detaint(%input, qr#\w+\s+\d+#s); 312 313 $rv = stringMatch($input, @strings); 314 315 $Paranoid::Input::regexes{'new_type"} = qr/\w\s+\d+/s; 316 317 $rv = pchomp($lines); 318 $rv = pchomp(@lines); 319 $rv = pchomp(%dict); 320 321 # Chomp $_ 322 $rv = pchomp(); 323 324=head1 DESCRIPTION 325 326This provides some generic functions for working with text-based input. The 327main benefirst of this module is a relatively simple way of validating and 328detainting formatted text and performing platform-agnostic chomps. 329 330=head1 IMPORT LISTS 331 332This module exports the following symbols by default: 333 334 detaint stringMatch pchomp 335 336The following specialized import lists also exist: 337 338 List Members 339 -------------------------------------------------------- 340 all @defaults NEWLINE_REGEX 341 342=head1 VARIABLES 343 344=head2 NEWLINE_REGEX 345 346This returns regular expression that matches against DOS, UNIX, and legacy Mac 347line terminators. This is the regular expression used internally by L<pchomp> 348to perform platform-agnostic chomps. 349 350This is only exported if explicity requested, or under an import target of 351B<:all>. 352 353=head1 SUBROUTINES/METHODS 354 355=head2 detaint 356 357 $rv = detaint($userInput, "login", $val); 358 359This function populates the passed data object with the detainted input from the 360first argument. The second argument specifies the type of data in the first 361argument, and is used to validate the input before detainting. If you don't 362want to use one of the built-in regular expressions you can, instead, pass 363your own custom regular expression. 364 365The third argument is optional, but if used, must match the first argument's 366data type. If it is omitted all detainted values are used to overwrite the 367contents of the first argument. If detaint fails for any reason B<undef> is 368used instead. 369 370If the first argument fails to match against these regular expressions the 371function will return 0. If the string passed is either undefined or a 372zero-length string it will also return 0. And finally, if you attempt to use 373an unknown (or unregistered) data type it will also return 0, and log an error 374message in B<Paranoid::ERROR>. 375 376The following regular expressions are known by name: 377 378 Name Description 379 ========================================================= 380 alphabetic Alphabetic characters 381 alphanumeric Alphabetic/numeric characters 382 alphawhite Alphabetic/whitespace characters 383 alnumwhite Alphabetic/numeric/whitespace characters 384 email RFC 822 Email address format 385 filename Essentially no-metacharacters 386 fileglob Same as filename, but with glob meta- 387 character support 388 hostname Alphanumeric/hyphenated host names 389 ipv4addr IPv4 address 390 ipv4netaddr IPv4 network address (CIDR/dotted quad) 391 ipv6addr IPv6 address 392 ipv6netaddr IPv6 network address (CIDR) 393 login UNIX login format 394 nometa Everything but meta-characters 395 number Integer/float/signed/unsigned 396 int Integer/signed/unsigned 397 uint Integer/unsigned 398 float Float/signed/unsigned 399 ufloat Float/unsigned 400 bin binary 401 octal octal 402 hex hexadecimal 403 404=head2 stringMatch 405 406 $rv = stringMatch($input, @strings); 407 408This function does a multiline case insensitive regex match against the 409input for every string passed for matching. This does safe quoted matches 410(\Q$string\E) for all the strings, unless the string is a perl Regexp 411(defined with qr//) or begins and ends with /. 412 413B<NOTE>: this performs a study in hopes that for a large number of regexes 414will be performed faster. This may not always be the case. 415 416=head2 pchomp 417 418 $rv = pchomp(@lines); 419 420B<pchomp> is meant to be a drop-in replacement for chomp, primarily where you 421want it to work as a platform-agnostic line chomper. If I<$/> is altered in 422any manner (slurp mode, fixed record length, etc.) it will assume that's not 423important and automatically call B<chomp> instead. It should, then, be safe 424to be called in all instances in which you'd call B<chomp> itself. 425 426In a nutshell, this function attempts to avoid the assumption that B<chomp> 427makes in that the latter assumes that all input it works upon was authored on 428the same system, using the same input record separators. Using B<pchomp> in 429lieu of B<chomp> will allow you to treat DOS, UNIX, and Mac-authored files 430identically with no additional coding. 431 432Because it is assumed that B<pchomp> will be used in potentially high 433frequency scenarios no B<pdebug> calls are made within it to avoid exercising 434the stack any more than necessary. It is hoped that the relative simplicity 435of the subroutine should make debug use unnecessary. 436 437=head1 DEPENDENCIES 438 439=over 440 441=item o 442 443L<Carp> 444 445=item o 446 447L<Paranoid> 448 449=item o 450 451L<Paranoid::Debug> 452 453=back 454 455=head1 BUGS AND LIMITATIONS 456 457=head1 AUTHOR 458 459Arthur Corliss (corliss@digitalmages.com) 460 461=head1 LICENSE AND COPYRIGHT 462 463This software is free software. Similar to Perl, you can redistribute it 464and/or modify it under the terms of either: 465 466 a) the GNU General Public License 467 <https://www.gnu.org/licenses/gpl-1.0.html> as published by the 468 Free Software Foundation <http://www.fsf.org/>; either version 1 469 <https://www.gnu.org/licenses/gpl-1.0.html>, or any later version 470 <https://www.gnu.org/licenses/license-list.html#GNUGPL>, or 471 b) the Artistic License 2.0 472 <https://opensource.org/licenses/Artistic-2.0>, 473 474subject to the following additional term: No trademark rights to 475"Paranoid" have been or are conveyed under any of the above licenses. 476However, "Paranoid" may be used fairly to describe this unmodified 477software, in good faith, but not as a trademark. 478 479(c) 2005 - 2020, Arthur Corliss (corliss@digitalmages.com) 480(tm) 2008 - 2020, Paranoid Inc. (www.paranoid.com) 481 482