1package Crypt::PassGen; 2 3=head1 NAME 4 5Crypt::PassGen - Generate a random password that looks like a real word 6 7=head1 SYNOPSIS 8 9 use Crypt::PassGen qw/ passgen /; 10 11 @passwords = passgen( NWORDS => 10, NLETT => 8 ); 12 13=head1 DESCRIPTION 14 15This module provides a single command for generating random password 16that is close enough to a real word that it is easy to remember. 17It does this by using the frequency of letter combinations in 18a language (the frequency table is generated during installation 19although multiple tables can be generated and used for different 20languages). The frequency table contains the probability that 21a word will start with a specific letter or 2 letter combination 22and then the frequency of 3 letter combinations. 23 24This module should not be used for high security applications 25(such as user accounts) since it returns passwords that are not 26mixed case, have no punctuation and no letters. This word can be 27used as a basis for a more secure password. 28 29The language of the password depends on the language used to construct 30the frequency table. 31 32=cut 33 34use integer; 35use strict; 36use Storable qw/ nstore retrieve /; 37use File::Spec; 38use Config; 39use vars qw/ $VERSION @ISA @EXPORT_OK $DEFAULT_DICT $DEFAULT_FREQFILE 40 $ERRSTR 41 /; 42 43$VERSION = '0.06'; 44 45use constant MAXN => 32000; 46require Exporter; 47@ISA = qw( Exporter ); 48@EXPORT_OK = qw( passgen ingest ); 49 50# Default input dictionary and frequency file 51# The frequency file should be stored in the same place as this 52# module [use Config] 53 54$DEFAULT_DICT = '/usr/dict/words'; # Unix specific 55 56$DEFAULT_FREQFILE = File::Spec->catfile($Config{installsitelib}, 57 "Crypt", 58 "PassGenWordFreq.dat"); 59 60# This is the cache of frequency data to prevent us going to 61# disk each time passgen() is called. This effectively means 62# that calling passgen() 100 times is almost as fast as calling 63# passgen once for 100 passwords. 64 65my %CACHE; 66 67# Set up a hash with a lookup table to translate a character to 68# a position in an array 69 70my %letters = ( 71 A => 0, 72 B => 1, 73 C => 2, 74 D => 3, 75 E => 4, 76 F => 5, 77 G => 6, 78 H => 7, 79 I => 8, 80 J => 9, 81 K => 10, 82 L => 11, 83 M => 12, 84 N => 13, 85 O => 14, 86 P => 15, 87 Q => 16, 88 R => 17, 89 S => 18, 90 T => 19, 91 U => 20, 92 V => 21, 93 W => 22, 94 X => 23, 95 Y => 24, 96 Z => 25 97); 98 99# ...and generate the inverse lookup table so that we can go from 100# a position to a letter 101 102my %revlett; 103foreach (keys %letters) { 104 $revlett{ $letters{$_} } = $_; 105} 106 107=head1 FUNCTIONS 108 109The following functions are provided: 110 111=over 4 112 113=item B<ingest> 114 115This function is used to create a frequency table to be used later 116by C<passgen>. This routine is run during the initial install of the 117module so that at least one frequency table is available. 118 119This function reads a file and for each word that is found (ignoring 120any with non-alphabet characters) notes the starting letter, the 121second letter and each combination of 3 letters. Once the file is read 122the resultant arrays then contain the relative occurence of each letter 123combination. The frequency table will vary depending on the language 124of the input file. 125 126 ingest( DICT => '/usr/dict/words', 127 FILE => 'wordfreq.dat', 128 APPEND => 0) 129 130The input hash can contain keys C<DICT>, C<FILE> and C<APPEND> 131with the above defaults. All arguments are optional. If C<APPEND> 132is true the frequency table from the input dictionary will be 133appended to an existing table (if it exists). 134 135Returns 1 if successful and 0 otherwise. On error, the reason 136is stored in $Crypt::PassGen::ERRSTR. 137 138A default frequency file is provided for C<passgen> as part of the 139installation. This routine is only required to either extend or 140replace the default value. 141 142=cut 143 144sub ingest { 145 146 my %defaults = ( 147 DICT => $DEFAULT_DICT, 148 FILE => $DEFAULT_FREQFILE, 149 APPEND => 0, 150 ); 151 152 my %opts = ( %defaults, @_ ); 153 154 # This becomes our pseudo-object 155 my $data; 156 157 # If we are appending to previous data we simply need 158 # to read that in to initialise the arrays 159 if ($opts{APPEND} && -e $opts{FILE}) { 160 161 $data = _readdata( $opts{FILE} ); 162 163 } else { 164 # Initialise these arrays with zeroes to be -w clean 165 # Calculate the size 166 my $nkeys = (scalar keys %letters) - 1; 167 168 # Create the data structure with 3 arrays 169 $data = { 170 FIRST => [], # Occurence of a starting letter 171 SECOND => [],# Occurences of the first 2 letter combos 172 THIRD => [], # Occurences of 3 letter combinations 173 }; 174 175 # Not appending, so we need to presize and fill with zeroes 176 # presize then initialise with map 177 $data->{FIRST}[$nkeys] = 0; 178 @{ $data->{FIRST} } = map { 0 } @{ $data->{FIRST} }; 179 for my $i (0..$nkeys) { 180 $data->{SECOND}[$i][$nkeys] = 0; 181 @{ $data->{SECOND}[$i] } = map { 0 } @{ $data->{SECOND}[$i] }; 182 183 for my $j ( 0..$nkeys ) { 184 $data->{THIRD}[$i][$j][$nkeys] = 0; 185 @{ $data->{THIRD}[$i][$j] } = map { 0 } @{ $data->{THIRD}[$i][$j] } 186 } 187 } 188 } 189 190 # Open the dictionary file 191 open( LISTOWORDS, $opts{DICT}) or 192 do { $ERRSTR = "Could not open dictionary file $opts{DICT}: $!"; return 0}; 193 194 # Now read a line at a time from the file 195 while (<LISTOWORDS>){ 196 my @words = split ; 197 for my $word (@words){ 198 next if $word !~ /^[a-z]+$/i || length($word) < 3; 199 $word = uc($word); 200 # Split the word into letters 201 my @temlets = split //,$word; 202 203 # increment the freq. of the first two letters of the word 204 $data->{FIRST}[ $letters{$temlets[0]} ]++; 205 206 # Divide everything by 2 if we are becoming too large 207 _scale_first_down( $data->{FIRST} ) 208 if $data->{FIRST}[$letters{$temlets[0]} ] > MAXN; 209 210 $data->{SECOND}[ $letters{$temlets[0]} ][ $letters{$temlets[1]} ]++; 211 212 # Divide everything by 2 if we are becoming too large 213 _scale_seconds_down( $data->{SECOND} ) 214 if $data->{SECOND}[$letters{$temlets[0]}][$letters{$temlets[1]}] > MAXN; 215 216 # look at letter freq for rest of the word 217 for my $j (2 .. $#temlets){ 218 $data->{THIRD}[$letters{$temlets[$j-2]}][$letters{$temlets[$j-1]}][$letters{$temlets[$j]}]++; 219 220 # Divide everything by 2 if we are becoming too large 221 _scale_thirds_down( $data->{THIRD} ) 222 if $data->{THIRD}[$letters{$temlets[$j-2]}][$letters{$temlets[$j-1]}][$letters{$temlets[$j]}] > MAXN; 223 } 224 } 225 } 226 227 # Close dictionary file 228 close( LISTOWORDS ) or 229 do { 230 $ERRSTR = "Could not close dictionary file $opts{DICT}: $!"; 231 return 0; 232 }; 233 234 # Precalculate the totals - this is a trade off of 235 # disk space versus speed and disk space is cheap (and this 236 # is not a very large array anyway 237 _calctotals( $data ); 238 239 # Now store the data in the output file 240 _storedata( $data, , $opts{FILE} ) or 241 do { 242 $ERRSTR = "Error storing data to $opts{FILE}"; 243 return; 244 }; 245 246 return 1; 247} 248 249=item B<passgen> 250 251Generate a password. 252 253 @words = passgen( %options ); 254 255Argument is a hash with the following keys: 256 257 FILE The filename containing the frequency information. Must 258 have been written using C<ingest>. 259 NLETT Number of letters to use for the generated password. 260 Must be at least 5 261 NWORDS Number of passwords to generate 262 263An array of passwords is returned. An empty list is returned 264if an error occurs (and $Crypt::PassGen::ERRSTR is set to 265the reason). 266 267=cut 268 269sub passgen { 270 271 my %defaults = ( 272 FILE => $DEFAULT_FREQFILE, 273 NLETT => 8, 274 NWORDS => 1, 275 ); 276 277 my %opts = (%defaults, @_); 278 279 # Return if NLETT is too short 280 if ($opts{NLETT} < 5) { 281 $ERRSTR = 'A password must be at least 5 letters'; 282 return (); 283 } 284 285 # Read in the data 286 my $data = _readdata( $opts{FILE} ); 287 288 # Calculate the minimum score 289 my $minscore = _calcminscore( $data, $opts{NLETT} ); 290 291 # Generate the required number of passwords 292 my @WORDS; 293 for my $n ( 1..$opts{NWORDS} ) { 294 295 push(@WORDS, _generate( $data, $opts{NLETT}, $minscore )) ; 296 297 } 298 299 return @WORDS; 300} 301 302 303# internal routines 304 305# Generate a password 306# Arguments: data 'object', number of letters, minimum score 307# returns a lower-cased password 308 309sub _generate ($$) { 310 my ($data, $nlett, $minscore) = @_; 311 312 # Need to loop round until we reach minimum score 313 my $score = 0; 314 my $n = 0; 315 my $word; 316 317 WORDLOOP: while ($score < $minscore || length($word) < $nlett) { 318 319 # reset current score 320 $score = 0; 321 322 # Keep track of the number of times around 323 $n++; 324 if ($n > 100) { 325 $n = 0; 326 $minscore *= 0.75; 327 } 328 329 # These calculations could all be prettified off into a sub 330 # Now pick letters at random (starting with the first) 331 my $ind = _tot_to_index(int(rand( $data->{FIRST_TOT} )), $data->{FIRST} ); 332 next WORDLOOP if $ind < 0; 333 $word = $revlett{ $ind }; 334 $score= $data->{FIRST}[ $ind ]; 335 my $prev1 = $ind; 336 337 # Now the second letter 338 $ind = _tot_to_index( int(rand( $data->{SECOND_TOT}[$prev1] )), 339 $data->{SECOND}[ $prev1 ]); 340 next WORDLOOP if $ind < 0; 341 my $prev2 = $ind; 342 $score += $data->{SECOND}[ $prev1 ][ $prev2 ]; 343 $word .= $revlett{ $ind }; 344 345 # Loop until we get the required number of letters 346 for my $i ( 3.. $nlett ) { 347 $ind = _tot_to_index( int(rand( $data->{PAIR_TOT}[$prev1][$prev2] )), 348 $data->{THIRD}[$prev1][$prev2] ); 349 next WORDLOOP if $ind < 0; 350 $score += $data->{THIRD}[$prev1][$prev2][$ind]; 351 $word .= $revlett{ $ind }; 352 $prev1 = $prev2; # store the previous two letters 353 $prev2 = $ind; 354 } 355 356 } 357 358 return lc($word); 359 360} 361 362# store the frequency data to disk 363# Arguments: 364# Data to store ( the 'object' ) 365# Output filename 366# Append or not 367 368# Returns: 1 (good), 0 (bad) 369 370sub _storedata ($$) { 371 my ($data, $file ) = @_; 372 373 # Now simply write the data in network order 374 nstore( $data, $file ); 375} 376 377# Read the data 378# Arguments: filename 379# Returns : the data (undef on error) 380# The data is a hash with keys FIRST, SECOND, THIRD 381# The data is cached to prevent reading the frequency 382# table from disk each time -- 99.9% of the time we will 383# be reading from the same file and the memory overhead 384# of keeping the cache open is insignificant 385# The cache is keyed by the filename but can not tell that 386# file /a/b/c is the same as 'b/c'. 387 388# This is a 'constructor' 389 390sub _readdata { 391 my $file = shift; 392 if ( exists $CACHE{ $file } ) { 393 return $CACHE{ $file }; 394 } else { 395 my $data = retrieve( $file ); 396 $CACHE{ $file } = $data; 397 return $data; 398 } 399} 400 401# Divide everything in a 1-D array by 2 402 403sub _scale_first_down { 404 use integer; 405 my $arr = shift; 406 for ( @$arr ) { 407 $_ /= 2; 408 } 409} 410 411# Divide everything in a 2-D array by 2 412 413sub _scale_seconds_down { 414 use integer; 415 my $arr = shift; 416 for my $i (@$arr) { 417 for my $j (@$i) { 418 $j /= 2; 419 } 420 } 421} 422 423# Divide everything in a 3-D array by 2.0 424 425sub _scale_thirds_down { 426 use integer; 427 my $arr = shift; 428 for my $i (@$arr) { 429 for my $j (@$i) { 430 for my $k (@$j) { 431 $k /= 2; 432 } 433 } 434 } 435} 436 437# Calculate the totals 438# Effectively calculates the total weight for each letter combination 439# Argument: hash reference containing FIRST, SECOND and THIRD 440# 441# Adds the following keys FIRST_TOT (scalar), SECOND_TOT, PAIR_TOT 442# (array refs) which are the totals for each letter combination. 443# and AVFIRST, AVSECOND and AVHTHIRD (the average occurence related 444 445sub _calctotals { 446 my $data = shift; 447 448 my ( $nfirst, $nsec, $nthird ); 449 my ( $second_fullsum, $third_fullsum ); 450 451 # Get the size (yes I know it is 26-1) 452 my $size = $#{ $data->{FIRST} }; 453 454 # Init 455 $data->{FIRST_TOT} = 0; 456 $data->{SECOND_TOT} = []; 457 @{ $data->{SECOND_TOT} } = map { 0 } (0..$size); 458 for my $i ( 0.. $size ) { 459 $data->{PAIR_TOT}[$i] = []; 460 @{ $data->{PAIR_TOT}[$i] } = map { 0 } (0..$size); 461 } 462 463 # Loop over all members summing up 464 for my $i ( 0 .. $size ) { 465 $data->{FIRST_TOT} += $data->{FIRST}[$i]; 466 $nfirst++ if $data->{FIRST}[$i]; 467 for my $j ( 0 .. $size ) { 468 $data->{SECOND_TOT}[$i] += $data->{SECOND}[$i][$j]; 469 $nsec++ if $data->{SECOND}[$i][$j]; 470 $second_fullsum += $data->{SECOND}[$i][$j]; 471 for my $k ( 0 .. $size ) { 472 $data->{PAIR_TOT}[$i][$j] += $data->{THIRD}[$i][$j][$k]; 473 $nthird++ if $data->{THIRD}[$i][$j][$k]; 474 $third_fullsum += $data->{THIRD}[$i][$j][$k]; 475 } 476 } 477 } 478 479 # Calculate the average none zero occurence 480 $data->{AVFIRST} = $data->{FIRST_TOT} / $nfirst; 481 $data->{AVSECOND} = $second_fullsum / $nsec; 482 $data->{AVTHIRD} = $third_fullsum / $nthird; 483 484} 485 486# Calculate the minimum score. When each letter is selected 487# its occurence value is added to a score. The minimum score 488# criterion decides whether the generated password is good enough 489# or needs to be regenerated. It is simply the sum of the 490# average occurences for each letter multiplied by 3. 491 492# Arguments: Data hash, length of required password 493 494sub _calcminscore { 495 my $data = shift; 496 my $length = shift; 497 498 my $score = 3 * ( $data->{AVFIRST} + $data->{AVSECOND} + 499 ( ( $length - 2 ) * $data->{AVTHIRD} ) ); 500 501 return $score; 502} 503 504# Translate a position in the TOT array to an index in the corresponding 505# array 506 507# Arguments: Total, array ref to be searched 508# Returns: pos 509# -1 if no index could be determined 510 511sub _tot_to_index { 512 my ($tot, $arr) = @_; 513 my $i=0; 514 while ($tot >= 0 && $i <= $#$arr) { 515 $tot -= $arr->[ $i ]; 516# print "Tot now: $tot\t $i ",$revlett{$i}," ",$arr->[$i],"\n"; 517 $i++; 518 } 519 520 # if we are still >= 0 we could not match an index 521 return -1 if $tot >= 0; 522 523 # Found a valid index 524 return --$i; 525} 526 527 528=back 529 530=head1 ERROR HANDLING 531 532All routines in this module store errors in the ERRSTR 533variable. This variable can be accessed if the routines 534return an error state and contains the reason for the error. 535 536 @words = passgen( NLETT => 2 ) 537 or die "Error message: $Crypt::PassGen::ERRSTR"; 538 539=head1 AUTHORS 540 541Tim Jenness E<lt>tjenness@cpan.orgE<gt> Copyright (C) 5422000-2012 T. Jenness. All Rights Reserved. This program is free 543software; you can redistribute it and/or modify it under the same 544terms as Perl itself. 545 546Based on the PASSGEN program written by Mike Bartman of SAR, Inc as 547part of the SPAN security toolkit. 548 549=cut 550 551 5521; 553