1# -*- mode: perl; perl-indent-level: 2; -*- 2# vim: ts=8 sw=2 sts=2 noexpandtab 3 4# Memoize.pm 5# 6# Copyright 1998, 1999, 2000, 2001, 2012 M. J. Dominus. 7# You may copy and distribute this program under the 8# same terms as Perl itself. 9 10use strict; use warnings; 11 12package Memoize; 13our $VERSION = '1.16'; 14 15use Carp; 16use Scalar::Util 1.11 (); # for set_prototype 17 18BEGIN { require Exporter; *import = \&Exporter::import } 19our @EXPORT = qw(memoize); 20our @EXPORT_OK = qw(unmemoize flush_cache); 21 22my %memotable; 23 24sub CLONE { 25 my @info = values %memotable; 26 %memotable = map +($_->{WRAPPER} => $_), @info; 27} 28 29sub memoize { 30 my $fn = shift; 31 my %options = @_; 32 33 unless (defined($fn) && 34 (ref $fn eq 'CODE' || ref $fn eq '')) { 35 croak "Usage: memoize 'functionname'|coderef {OPTIONS}"; 36 } 37 38 my $uppack = caller; # TCL me Elmo! 39 my $name = (ref $fn ? undef : $fn); 40 my $cref = _make_cref($fn, $uppack); 41 42 my $normalizer = $options{NORMALIZER}; 43 if (defined $normalizer && ! ref $normalizer) { 44 $normalizer = _make_cref($normalizer, $uppack); 45 } 46 47 my $install_name = exists $options{INSTALL} 48 ? $options{INSTALL} # use given name (or, if undef: do not install) 49 : $name; # no INSTALL option provided: default to original name if possible 50 51 if (defined $install_name) { 52 $install_name = $uppack . '::' . $install_name 53 unless $install_name =~ /::/; 54 } 55 56 # convert LIST_CACHE => MERGE to SCALAR_CACHE => MERGE 57 # to ensure TIE/HASH will always be checked by _check_suitable 58 if (($options{LIST_CACHE} || '') eq 'MERGE') { 59 $options{LIST_CACHE} = $options{SCALAR_CACHE}; 60 $options{SCALAR_CACHE} = 'MERGE'; 61 } 62 63 # These will be the caches 64 my %caches; 65 for my $context (qw(LIST SCALAR)) { # SCALAR_CACHE must be last, to process MERGE 66 my $fullopt = $options{"${context}_CACHE"} ||= 'MEMORY'; 67 my ($cache_opt, @cache_opt_args) = ref $fullopt ? @$fullopt : $fullopt; 68 if ($cache_opt eq 'FAULT') { # no cache 69 $caches{$context} = undef; 70 } elsif ($cache_opt eq 'HASH') { # user-supplied hash 71 my $cache = $cache_opt_args[0]; 72 _check_suitable($context, ref tied %$cache); 73 $caches{$context} = $cache; 74 } elsif ($cache_opt eq 'TIE') { 75 carp("TIE option to memoize() is deprecated; use HASH instead") 76 if warnings::enabled('all'); 77 my $module = shift(@cache_opt_args) || ''; 78 _check_suitable($context, $module); 79 my $hash = $caches{$context} = {}; 80 (my $modulefile = $module . '.pm') =~ s{::}{/}g; 81 require $modulefile; 82 tie(%$hash, $module, @cache_opt_args) 83 or croak "Couldn't tie memoize hash to `$module': $!"; 84 } elsif ($cache_opt eq 'MEMORY') { 85 $caches{$context} = {}; 86 } elsif ($cache_opt eq 'MERGE' and not ref $fullopt) { # ['MERGE'] was never supported 87 die "cannot MERGE $context\_CACHE" if $context ne 'SCALAR'; # should never happen 88 die 'bad cache setup order' if not exists $caches{LIST}; # should never happen 89 $options{MERGED} = 1; 90 $caches{SCALAR} = $caches{LIST}; 91 } else { 92 croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (MERGE TIE MEMORY FAULT HASH)"; 93 } 94 } 95 96 my $wrapper = _wrap($install_name, $cref, $normalizer, $options{MERGED}, \%caches); 97 98 if (defined $install_name) { 99 no strict; 100 no warnings 'redefine'; 101 *{$install_name} = $wrapper; 102 } 103 104 $memotable{$wrapper} = { 105 L => $caches{LIST}, 106 S => $caches{SCALAR}, 107 U => $cref, 108 NAME => $install_name, 109 WRAPPER => $wrapper, 110 }; 111 112 $wrapper # Return just memoized version 113} 114 115sub flush_cache { 116 my $func = _make_cref($_[0], scalar caller); 117 my $info = $memotable{$func}; 118 die "$func not memoized" unless defined $info; 119 for my $context (qw(S L)) { 120 my $cache = $info->{$context}; 121 if (tied %$cache && ! (tied %$cache)->can('CLEAR')) { 122 my $funcname = defined($info->{NAME}) ? 123 "function $info->{NAME}" : "anonymous function $func"; 124 my $context = {S => 'scalar', L => 'list'}->{$context}; 125 croak "Tied cache hash for $context-context $funcname does not support flushing"; 126 } else { 127 %$cache = (); 128 } 129 } 130} 131 132sub _wrap { 133 my ($name, $orig, $normalizer, $merged, $caches) = @_; 134 my ($cache_L, $cache_S) = @$caches{qw(LIST SCALAR)}; 135 undef $caches; # keep the pad from keeping the hash alive forever 136 Scalar::Util::set_prototype(sub { 137 my $argstr = do { 138 no warnings 'uninitialized'; 139 defined $normalizer 140 ? ( wantarray ? ( $normalizer->( @_ ) )[0] : $normalizer->( @_ ) ) 141 . '' # coerce undef to string while the warning is off 142 : join chr(28), @_; 143 }; 144 145 if (wantarray) { 146 _crap_out($name, 'list') unless $cache_L; 147 exists $cache_L->{$argstr} ? ( 148 @{$cache_L->{$argstr}} 149 ) : do { 150 my @q = do { no warnings 'recursion'; &$orig }; 151 $cache_L->{$argstr} = \@q; 152 @q; 153 }; 154 } else { 155 _crap_out($name, 'scalar') unless $cache_S; 156 exists $cache_S->{$argstr} ? ( 157 $merged ? $cache_S->{$argstr}[0] : $cache_S->{$argstr} 158 ) : do { 159 my $val = do { no warnings 'recursion'; &$orig }; 160 $cache_S->{$argstr} = $merged ? [$val] : $val; 161 $val; 162 }; 163 } 164 }, prototype $orig); 165} 166 167sub unmemoize { 168 my $f = shift; 169 my $uppack = caller; 170 my $cref = _make_cref($f, $uppack); 171 172 unless (exists $memotable{$cref}) { 173 croak "Could not unmemoize function `$f', because it was not memoized to begin with"; 174 } 175 176 my $tabent = $memotable{$cref}; 177 unless (defined $tabent) { 178 croak "Could not figure out how to unmemoize function `$f'"; 179 } 180 my $name = $tabent->{NAME}; 181 if (defined $name) { 182 no strict; 183 no warnings 'redefine'; 184 *{$name} = $tabent->{U}; # Replace with original function 185 } 186 delete $memotable{$cref}; 187 188 $tabent->{U}; 189} 190 191sub _make_cref { 192 my $fn = shift; 193 my $uppack = shift; 194 my $cref; 195 my $name; 196 197 if (ref $fn eq 'CODE') { 198 $cref = $fn; 199 } elsif (! ref $fn) { 200 if ($fn =~ /::/) { 201 $name = $fn; 202 } else { 203 $name = $uppack . '::' . $fn; 204 } 205 no strict; 206 if (defined $name and !defined(&$name)) { 207 croak "Cannot operate on nonexistent function `$fn'"; 208 } 209# $cref = \&$name; 210 $cref = *{$name}{CODE}; 211 } else { 212 my $parent = (caller(1))[3]; # Function that called _make_cref 213 croak "Usage: argument 1 to `$parent' must be a function name or reference.\n"; 214 } 215 our $DEBUG and warn "${name}($fn) => $cref in _make_cref\n"; 216 $cref; 217} 218 219sub _crap_out { 220 my ($funcname, $context) = @_; 221 if (defined $funcname) { 222 croak "Function `$funcname' called in forbidden $context context; faulting"; 223 } else { 224 croak "Anonymous function called in forbidden $context context; faulting"; 225 } 226} 227 228# Raise an error if the user tries to specify one of these packages as a 229# tie for LIST_CACHE 230my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File), map +($_, "Memoize::$_"), qw(AnyDBM_File NDBM_File); 231sub _check_suitable { 232 my ($context, $package) = @_; 233 croak "You can't use $package for LIST_CACHE because it can only store scalars" 234 if $context eq 'LIST' and $scalar_only{$package}; 235} 236 2371; 238 239__END__ 240 241=pod 242 243=head1 NAME 244 245Memoize - Make functions faster by trading space for time 246 247=head1 SYNOPSIS 248 249 use Memoize; 250 memoize('slow_function'); 251 slow_function(arguments); # Is faster than it was before 252 253 254This is normally all you need to know. However, many options are available: 255 256 memoize(function, options...); 257 258Options include: 259 260 NORMALIZER => function 261 INSTALL => new_name 262 263 SCALAR_CACHE => 'MEMORY' 264 SCALAR_CACHE => ['HASH', \%cache_hash ] 265 SCALAR_CACHE => 'FAULT' 266 SCALAR_CACHE => 'MERGE' 267 268 LIST_CACHE => 'MEMORY' 269 LIST_CACHE => ['HASH', \%cache_hash ] 270 LIST_CACHE => 'FAULT' 271 LIST_CACHE => 'MERGE' 272 273=head1 DESCRIPTION 274 275I<Memoizing> a function makes it faster by trading space for time. It 276does this by caching the return values of the function in a table. 277If you call the function again with the same arguments, C<memoize> 278jumps in and gives you the value out of the table, instead of letting 279the function compute the value all over again. 280 281=head1 EXAMPLE 282 283Here is an extreme example. Consider the Fibonacci sequence, defined 284by the following function: 285 286 # Compute Fibonacci numbers 287 sub fib { 288 my $n = shift; 289 return $n if $n < 2; 290 fib($n-1) + fib($n-2); 291 } 292 293This function is very slow. Why? To compute fib(14), it first wants 294to compute fib(13) and fib(12), and add the results. But to compute 295fib(13), it first has to compute fib(12) and fib(11), and then it 296comes back and computes fib(12) all over again even though the answer 297is the same. And both of the times that it wants to compute fib(12), 298it has to compute fib(11) from scratch, and then it has to do it 299again each time it wants to compute fib(13). This function does so 300much recomputing of old results that it takes a really long time to 301run---fib(14) makes 1,200 extra recursive calls to itself, to compute 302and recompute things that it already computed. 303 304This function is a good candidate for memoization. If you memoize the 305C<fib> function above, it will compute fib(14) exactly once, the first 306time it needs to, and then save the result in a table. Then if you 307ask for fib(14) again, it gives you the result out of the table. 308While computing fib(14), instead of computing fib(12) twice, it does 309it once; the second time it needs the value it gets it from the table. 310It doesn't compute fib(11) four times; it computes it once, getting it 311from the table the next three times. Instead of making 1,200 312recursive calls to C<fib>, it makes 15. This makes the function about 313150 times faster. 314 315You could do the memoization yourself, by rewriting the function, like 316this: 317 318 # Compute Fibonacci numbers, memoized version 319 { my @fib; 320 sub fib { 321 my $n = shift; 322 return $fib[$n] if defined $fib[$n]; 323 return $fib[$n] = $n if $n < 2; 324 $fib[$n] = fib($n-1) + fib($n-2); 325 } 326 } 327 328Or you could use this module, like this: 329 330 use Memoize; 331 memoize('fib'); 332 333 # Rest of the fib function just like the original version. 334 335This makes it easy to turn memoizing on and off. 336 337Here's an even simpler example: I wrote a simple ray tracer; the 338program would look in a certain direction, figure out what it was 339looking at, and then convert the C<color> value (typically a string 340like C<red>) of that object to a red, green, and blue pixel value, like 341this: 342 343 for ($direction = 0; $direction < 300; $direction++) { 344 # Figure out which object is in direction $direction 345 $color = $object->{color}; 346 ($r, $g, $b) = @{&ColorToRGB($color)}; 347 ... 348 } 349 350Since there are relatively few objects in a picture, there are only a 351few colors, which get looked up over and over again. Memoizing 352C<ColorToRGB> sped up the program by several percent. 353 354=head1 DETAILS 355 356This module exports exactly one function, C<memoize>. The rest of the 357functions in this package are None of Your Business. 358 359You should say 360 361 memoize(function) 362 363where C<function> is the name of the function you want to memoize, or 364a reference to it. C<memoize> returns a reference to the new, 365memoized version of the function, or C<undef> on a non-fatal error. 366At present, there are no non-fatal errors, but there might be some in 367the future. 368 369If C<function> was the name of a function, then C<memoize> hides the 370old version and installs the new memoized version under the old name, 371so that C<&function(...)> actually invokes the memoized version. 372 373=head1 OPTIONS 374 375There are some optional options you can pass to C<memoize> to change 376the way it behaves a little. To supply options, invoke C<memoize> 377like this: 378 379 memoize(function, NORMALIZER => function, 380 INSTALL => newname, 381 SCALAR_CACHE => option, 382 LIST_CACHE => option 383 ); 384 385Each of these options is optional; you can include some, all, or none 386of them. 387 388=head2 INSTALL 389 390If you supply a function name with C<INSTALL>, memoize will install 391the new, memoized version of the function under the name you give. 392For example, 393 394 memoize('fib', INSTALL => 'fastfib') 395 396installs the memoized version of C<fib> as C<fastfib>; without the 397C<INSTALL> option it would have replaced the old C<fib> with the 398memoized version. 399 400To prevent C<memoize> from installing the memoized version anywhere, use 401C<INSTALL =E<gt> undef>. 402 403=head2 NORMALIZER 404 405Suppose your function looks like this: 406 407 # Typical call: f('aha!', A => 11, B => 12); 408 sub f { 409 my $a = shift; 410 my %hash = @_; 411 $hash{B} ||= 2; # B defaults to 2 412 $hash{C} ||= 7; # C defaults to 7 413 414 # Do something with $a, %hash 415 } 416 417Now, the following calls to your function are all completely equivalent: 418 419 f(OUCH); 420 f(OUCH, B => 2); 421 f(OUCH, C => 7); 422 f(OUCH, B => 2, C => 7); 423 f(OUCH, C => 7, B => 2); 424 (etc.) 425 426However, unless you tell C<Memoize> that these calls are equivalent, 427it will not know that, and it will compute the values for these 428invocations of your function separately, and store them separately. 429 430To prevent this, supply a C<NORMALIZER> function that turns the 431program arguments into a string in a way that equivalent arguments 432turn into the same string. A C<NORMALIZER> function for C<f> above 433might look like this: 434 435 sub normalize_f { 436 my $a = shift; 437 my %hash = @_; 438 $hash{B} ||= 2; 439 $hash{C} ||= 7; 440 441 join(',', $a, map ($_ => $hash{$_}) sort keys %hash); 442 } 443 444Each of the argument lists above comes out of the C<normalize_f> 445function looking exactly the same, like this: 446 447 OUCH,B,2,C,7 448 449You would tell C<Memoize> to use this normalizer this way: 450 451 memoize('f', NORMALIZER => 'normalize_f'); 452 453C<memoize> knows that if the normalized version of the arguments is 454the same for two argument lists, then it can safely look up the value 455that it computed for one argument list and return it as the result of 456calling the function with the other argument list, even if the 457argument lists look different. 458 459The default normalizer just concatenates the arguments with character 46028 in between. (In ASCII, this is called FS or control-\.) This 461always works correctly for functions with only one string argument, 462and also when the arguments never contain character 28. However, it 463can confuse certain argument lists: 464 465 normalizer("a\034", "b") 466 normalizer("a", "\034b") 467 normalizer("a\034\034b") 468 469for example. 470 471Since hash keys are strings, the default normalizer will not 472distinguish between C<undef> and the empty string. It also won't work 473when the function's arguments are references. For example, consider a 474function C<g> which gets two arguments: A number, and a reference to 475an array of numbers: 476 477 g(13, [1,2,3,4,5,6,7]); 478 479The default normalizer will turn this into something like 480C<"13\034ARRAY(0x436c1f)">. That would be all right, except that a 481subsequent array of numbers might be stored at a different location 482even though it contains the same data. If this happens, C<Memoize> 483will think that the arguments are different, even though they are 484equivalent. In this case, a normalizer like this is appropriate: 485 486 sub normalize { join ' ', $_[0], @{$_[1]} } 487 488For the example above, this produces the key "13 1 2 3 4 5 6 7". 489 490Another use for normalizers is when the function depends on data other 491than those in its arguments. Suppose you have a function which 492returns a value which depends on the current hour of the day: 493 494 sub on_duty { 495 my ($problem_type) = @_; 496 my $hour = (localtime)[2]; 497 open my $fh, "$DIR/$problem_type" or die...; 498 my $line; 499 while ($hour-- > 0) 500 $line = <$fh>; 501 } 502 return $line; 503 } 504 505At 10:23, this function generates the 10th line of a data file; at 5063:45 PM it generates the 15th line instead. By default, C<Memoize> 507will only see the $problem_type argument. To fix this, include the 508current hour in the normalizer: 509 510 sub normalize { join ' ', (localtime)[2], @_ } 511 512The calling context of the function (scalar or list context) is 513propagated to the normalizer. This means that if the memoized 514function will treat its arguments differently in list context than it 515would in scalar context, you can have the normalizer function select 516its behavior based on the results of C<wantarray>. Even if called in 517a list context, a normalizer should still return a single string. 518 519=head2 C<SCALAR_CACHE>, C<LIST_CACHE> 520 521Normally, C<Memoize> caches your function's return values into an 522ordinary Perl hash variable. However, you might like to have the 523values cached on the disk, so that they persist from one run of your 524program to the next, or you might like to associate some other 525interesting semantics with the cached values. 526 527There's a slight complication under the hood of C<Memoize>: There are 528actually I<two> caches, one for scalar values and one for list values. 529When your function is called in scalar context, its return value is 530cached in one hash, and when your function is called in list context, 531its value is cached in the other hash. You can control the caching 532behavior of both contexts independently with these options. 533 534The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of 535the following four strings: 536 537 MEMORY 538 FAULT 539 MERGE 540 HASH 541 542or else it must be a reference to an array whose first element is one of 543these four strings, such as C<[HASH, arguments...]>. 544 545=over 4 546 547=item C<MEMORY> 548 549C<MEMORY> means that return values from the function will be cached in 550an ordinary Perl hash variable. The hash variable will not persist 551after the program exits. This is the default. 552 553=item C<HASH> 554 555C<HASH> allows you to specify that a particular hash that you supply 556will be used as the cache. You can tie this hash beforehand to give 557it any behavior you want. 558 559A tied hash can have any semantics at all. It is typically tied to an 560on-disk database, so that cached values are stored in the database and 561retrieved from it again when needed, and the disk file typically 562persists after your program has exited. See C<perltie> for more 563complete details about C<tie>. 564 565A typical example is: 566 567 use DB_File; 568 tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666; 569 memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 570 571This has the effect of storing the cache in a C<DB_File> database 572whose name is in C<$filename>. The cache will persist after the 573program has exited. Next time the program runs, it will find the 574cache already populated from the previous run of the program. Or you 575can forcibly populate the cache by constructing a batch program that 576runs in the background and populates the cache file. Then when you 577come to run your real program the memoized function will be fast 578because all its results have been precomputed. 579 580Another reason to use C<HASH> is to provide your own hash variable. 581You can then inspect or modify the contents of the hash to gain finer 582control over the cache management. 583 584=item C<TIE> 585 586This option is no longer supported. It is still documented only to 587aid in the debugging of old programs that use it. Old programs should 588be converted to use the C<HASH> option instead. 589 590 memoize ... ['TIE', PACKAGE, ARGS...] 591 592is merely a shortcut for 593 594 require PACKAGE; 595 { tie my %cache, PACKAGE, ARGS...; 596 memoize ... [HASH => \%cache]; 597 } 598 599=item C<FAULT> 600 601C<FAULT> means that you never expect to call the function in scalar 602(or list) context, and that if C<Memoize> detects such a call, it 603should abort the program. The error message is one of 604 605 `foo' function called in forbidden list context at line ... 606 `foo' function called in forbidden scalar context at line ... 607 608=item C<MERGE> 609 610C<MERGE> normally means that the memoized function does not 611distinguish between list and scalar context, and that return values in 612both contexts should be stored together. Both C<LIST_CACHE =E<gt> 613MERGE> and C<SCALAR_CACHE =E<gt> MERGE> mean the same thing. 614 615Consider this function: 616 617 sub complicated { 618 # ... time-consuming calculation of $result 619 return $result; 620 } 621 622The C<complicated> function will return the same numeric C<$result> 623regardless of whether it is called in list or in scalar context. 624 625Normally, the following code will result in two calls to C<complicated>, even 626if C<complicated> is memoized: 627 628 $x = complicated(142); 629 ($y) = complicated(142); 630 $z = complicated(142); 631 632The first call will cache the result, say 37, in the scalar cache; the 633second will cache the list C<(37)> in the list cache. The third call 634doesn't call the real C<complicated> function; it gets the value 37 635from the scalar cache. 636 637Obviously, the second call to C<complicated> is a waste of time, and 638storing its return value is a waste of space. Specifying C<LIST_CACHE 639=E<gt> MERGE> will make C<memoize> use the same cache for scalar and 640list context return values, so that the second call uses the scalar 641cache that was populated by the first call. C<complicated> ends up 642being called only once, and both subsequent calls return C<37> from the 643cache, regardless of the calling context. 644 645=back 646 647=head3 List values in scalar context 648 649Consider this function: 650 651 sub iota { return reverse (1..$_[0]) } 652 653This function normally returns a list. Suppose you memoize it and 654merge the caches: 655 656 memoize 'iota', SCALAR_CACHE => 'MERGE'; 657 658 @i7 = iota(7); 659 $i7 = iota(7); 660 661Here the first call caches the list (1,2,3,4,5,6,7). The second call 662does not really make sense. C<Memoize> cannot guess what behavior 663C<iota> should have in scalar context without actually calling it in 664scalar context. Normally C<Memoize> I<would> call C<iota> in scalar 665context and cache the result, but the C<SCALAR_CACHE =E<gt> 'MERGE'> 666option says not to do that, but to use the cache list-context value 667instead. But it cannot return a list of seven elements in a scalar 668context. In this case C<$i7> will receive the B<first element> of the 669cached list value, namely 7. 670 671=head3 Merged disk caches 672 673Another use for C<MERGE> is when you want both kinds of return values 674stored in the same disk file; this saves you from having to deal with 675two disk files instead of one. You can use a normalizer function to 676keep the two sets of return values separate. For example: 677 678 local $MLDBM::UseDB = 'DB_File'; 679 tie my %cache => 'MLDBM', $filename, ...; 680 681 memoize 'myfunc', 682 NORMALIZER => 'n', 683 SCALAR_CACHE => [HASH => \%cache], 684 LIST_CACHE => 'MERGE', 685 ; 686 687 sub n { 688 my $context = wantarray() ? 'L' : 'S'; 689 # ... now compute the hash key from the arguments ... 690 $hashkey = "$context:$hashkey"; 691 } 692 693This normalizer function will store scalar context return values in 694the disk file under keys that begin with C<S:>, and list context 695return values under keys that begin with C<L:>. 696 697=head1 OTHER FACILITIES 698 699=head2 C<unmemoize> 700 701There's an C<unmemoize> function that you can import if you want to. 702Why would you want to? Here's an example: Suppose you have your cache 703tied to a DBM file, and you want to make sure that the cache is 704written out to disk if someone interrupts the program. If the program 705exits normally, this will happen anyway, but if someone types 706control-C or something then the program will terminate immediately 707without synchronizing the database. So what you can do instead is 708 709 $SIG{INT} = sub { unmemoize 'function' }; 710 711C<unmemoize> accepts a reference to, or the name of a previously 712memoized function, and undoes whatever it did to provide the memoized 713version in the first place, including making the name refer to the 714unmemoized version if appropriate. It returns a reference to the 715unmemoized version of the function. 716 717If you ask it to unmemoize a function that was never memoized, it 718croaks. 719 720=head2 C<flush_cache> 721 722C<flush_cache(function)> will flush out the caches, discarding I<all> 723the cached data. The argument may be a function name or a reference 724to a function. For finer control over when data is discarded or 725expired, see the documentation for C<Memoize::Expire>, included in 726this package. 727 728Note that if the cache is a tied hash, C<flush_cache> will attempt to 729invoke the C<CLEAR> method on the hash. If there is no C<CLEAR> 730method, this will cause a run-time error. 731 732An alternative approach to cache flushing is to use the C<HASH> option 733(see above) to request that C<Memoize> use a particular hash variable 734as its cache. Then you can examine or modify the hash at any time in 735any way you desire. You may flush the cache by using C<%hash = ()>. 736 737=head1 CAVEATS 738 739Memoization is not a cure-all: 740 741=over 4 742 743=item * 744 745Do not memoize a function whose behavior depends on program 746state other than its own arguments, such as global variables, the time 747of day, or file input. These functions will not produce correct 748results when memoized. For a particularly easy example: 749 750 sub f { 751 time; 752 } 753 754This function takes no arguments, and as far as C<Memoize> is 755concerned, it always returns the same result. C<Memoize> is wrong, of 756course, and the memoized version of this function will call C<time> once 757to get the current time, and it will return that same time 758every time you call it after that. 759 760=item * 761 762Do not memoize a function with side effects. 763 764 sub f { 765 my ($a, $b) = @_; 766 my $s = $a + $b; 767 print "$a + $b = $s.\n"; 768 } 769 770This function accepts two arguments, adds them, and prints their sum. 771Its return value is the number of characters it printed, but you 772probably didn't care about that. But C<Memoize> doesn't understand 773that. If you memoize this function, you will get the result you 774expect the first time you ask it to print the sum of 2 and 3, but 775subsequent calls will return 1 (the return value of 776C<print>) without actually printing anything. 777 778=item * 779 780Do not memoize a function that returns a data structure that is 781modified by its caller. 782 783Consider these functions: C<getusers> returns a list of users somehow, 784and then C<main> throws away the first user on the list and prints the 785rest: 786 787 sub main { 788 my $userlist = getusers(); 789 shift @$userlist; 790 foreach $u (@$userlist) { 791 print "User $u\n"; 792 } 793 } 794 795 sub getusers { 796 my @users; 797 # Do something to get a list of users; 798 \@users; # Return reference to list. 799 } 800 801If you memoize C<getusers> here, it will work right exactly once. The 802reference to the users list will be stored in the memo table. C<main> 803will discard the first element from the referenced list. The next 804time you invoke C<main>, C<Memoize> will not call C<getusers>; it will 805just return the same reference to the same list it got last time. But 806this time the list has already had its head removed; C<main> will 807erroneously remove another element from it. The list will get shorter 808and shorter every time you call C<main>. 809 810Similarly, this: 811 812 $u1 = getusers(); 813 $u2 = getusers(); 814 pop @$u1; 815 816will modify $u2 as well as $u1, because both variables are references 817to the same array. Had C<getusers> not been memoized, $u1 and $u2 818would have referred to different arrays. 819 820=item * 821 822Do not memoize a very simple function. 823 824Recently someone mentioned to me that the Memoize module made his 825program run slower instead of faster. It turned out that he was 826memoizing the following function: 827 828 sub square { 829 $_[0] * $_[0]; 830 } 831 832I pointed out that C<Memoize> uses a hash, and that looking up a 833number in the hash is necessarily going to take a lot longer than a 834single multiplication. There really is no way to speed up the 835C<square> function. 836 837Memoization is not magical. 838 839=back 840 841=head1 PERSISTENT CACHE SUPPORT 842 843You can tie the cache tables to any sort of tied hash that you want 844to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and 845C<EXISTS>. For example, 846 847 tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666; 848 memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 849 850works just fine. For some storage methods, you need a little glue. 851 852C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this 853package is a glue module called C<Memoize::SDBM_File> which does 854provide one. Use this instead of plain C<SDBM_File> to store your 855cache table on disk in an C<SDBM_File> database: 856 857 tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666; 858 memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 859 860C<NDBM_File> has the same problem and the same solution. (Use 861C<Memoize::NDBM_File instead of plain NDBM_File.>) 862 863C<Storable> isn't a tied hash class at all. You can use it to store a 864hash to disk and retrieve it again, but you can't modify the hash while 865it's on the disk. So if you want to store your cache table in a 866C<Storable> database, use C<Memoize::Storable>, which puts a hashlike 867front-end onto C<Storable>. The hash table is actually kept in 868memory, and is loaded from your C<Storable> file at the time you 869memoize the function, and stored back at the time you unmemoize the 870function (or when your program exits): 871 872 tie my %cache => 'Memoize::Storable', $filename; 873 memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 874 875 tie my %cache => 'Memoize::Storable', $filename, 'nstore'; 876 memoize 'function', SCALAR_CACHE => [HASH => \%cache]; 877 878Include the C<nstore> option to have the C<Storable> database written 879in I<network order>. (See L<Storable> for more details about this.) 880 881The C<flush_cache()> function will raise a run-time error unless the 882tied package provides a C<CLEAR> method. 883 884=head1 EXPIRATION SUPPORT 885 886See Memoize::Expire, which is a plug-in module that adds expiration 887functionality to Memoize. If you don't like the kinds of policies 888that Memoize::Expire implements, it is easy to write your own plug-in 889module to implement whatever policy you desire. Memoize comes with 890several examples. An expiration manager that implements a LRU policy 891is available on CPAN as Memoize::ExpireLRU. 892 893=head1 BUGS 894 895The test suite is much better, but always needs improvement. 896 897There is some problem with the way C<goto &f> works under threaded 898Perl, perhaps because of the lexical scoping of C<@_>. This is a bug 899in Perl, and until it is resolved, memoized functions will see a 900slightly different C<caller()> and will perform a little more slowly 901on threaded perls than unthreaded perls. 902 903Some versions of C<DB_File> won't let you store data under a key of 904length 0. That means that if you have a function C<f> which you 905memoized and the cache is in a C<DB_File> database, then the value of 906C<f()> (C<f> called with no arguments) will not be memoized. If this 907is a big problem, you can supply a normalizer function that prepends 908C<"x"> to every key. 909 910=head1 SEE ALSO 911 912At L<https://perl.plover.com/MiniMemoize/> there is an article about 913memoization and about the internals of Memoize that appeared in The 914Perl Journal, issue #13. 915 916Mark-Jason Dominus's book I<Higher-Order Perl> (2005, ISBN 1558607013, 917published 918by Morgan Kaufmann) discusses memoization (and many other 919topics) in tremendous detail. It is available on-line for free. 920For more information, visit L<https://hop.perl.plover.com/>. 921 922=head1 THANK YOU 923 924Many thanks to Florian Ragwitz for administration and packaging 925assistance, to John Tromp for bug reports, to Jonathan Roy for bug reports 926and suggestions, to Michael Schwern for other bug reports and patches, 927to Mike Cariaso for helping me to figure out the Right Thing to Do 928About Expiration, to Joshua Gerth, Joshua Chamas, Jonathan Roy 929(again), Mark D. Anderson, and Andrew Johnson for more suggestions 930about expiration, to Brent Powers for the Memoize::ExpireLRU module, 931to Ariel Scolnicov for delightful messages about the Fibonacci 932function, to Dion Almaer for thought-provoking suggestions about the 933default normalizer, to Walt Mankowski and Kurt Starsinic for much help 934investigating problems under threaded Perl, to Alex Dudkevich for 935reporting the bug in prototyped functions and for checking my patch, 936to Tony Bass for many helpful suggestions, to Jonathan Roy (again) for 937finding a use for C<unmemoize()>, to Philippe Verdret for enlightening 938discussion of C<Hook::PrePostCall>, to Nat Torkington for advice I 939ignored, to Chris Nandor for portability advice, to Randal Schwartz 940for suggesting the 'C<flush_cache> function, and to Jenda Krynicky for 941being a light in the world. 942 943Special thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including 944this module in the core and for his patient and helpful guidance 945during the integration process. 946 947=head1 AUTHOR 948 949Mark Jason Dominus 950 951=head1 COPYRIGHT AND LICENSE 952 953This software is copyright (c) 2012 by Mark Jason Dominus. 954 955This is free software; you can redistribute it and/or modify it under 956the same terms as the Perl 5 programming language system itself. 957 958=cut 959