xref: /openbsd/gnu/usr.bin/perl/cpan/Memoize/Memoize.pm (revision 274d7c50)
1# -*- mode: perl; perl-indent-level: 2; -*-
2# Memoize.pm
3#
4# Transparent memoization of idempotent functions
5#
6# Copyright 1998, 1999, 2000, 2001, 2012 M. J. Dominus.
7# You may copy and distribute this program under the
8# same terms as Perl itself.  If in doubt,
9# write to mjd-perl-memoize+@plover.com for a license.
10
11package Memoize;
12$VERSION = '1.03_01';
13
14# Compile-time constants
15sub SCALAR () { 0 }
16sub LIST () { 1 }
17
18
19#
20# Usage memoize(functionname/ref,
21#               { NORMALIZER => coderef, INSTALL => name,
22#                 LIST_CACHE => descriptor, SCALAR_CACHE => descriptor }
23#
24
25use Carp;
26use Exporter;
27use vars qw($DEBUG);
28use Config;                     # Dammit.
29@ISA = qw(Exporter);
30@EXPORT = qw(memoize);
31@EXPORT_OK = qw(unmemoize flush_cache);
32use strict;
33
34my %memotable;
35my %revmemotable;
36my @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH);
37my %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS;
38
39# Raise an error if the user tries to specify one of thesepackage as a
40# tie for LIST_CACHE
41
42my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File);
43
44sub memoize {
45  my $fn = shift;
46  my %options = @_;
47  my $options = \%options;
48
49  unless (defined($fn) &&
50	  (ref $fn eq 'CODE' || ref $fn eq '')) {
51    croak "Usage: memoize 'functionname'|coderef {OPTIONS}";
52  }
53
54  my $uppack = caller;		# TCL me Elmo!
55  my $cref;			# Code reference to original function
56  my $name = (ref $fn ? undef : $fn);
57
58  # Convert function names to code references
59  $cref = &_make_cref($fn, $uppack);
60
61  # Locate function prototype, if any
62  my $proto = prototype $cref;
63  if (defined $proto) { $proto = "($proto)" }
64  else { $proto = "" }
65
66  # I would like to get rid of the eval, but there seems not to be any
67  # other way to set the prototype properly.  The switch here for
68  # 'usethreads' works around a bug in threadperl having to do with
69  # magic goto.  It would be better to fix the bug and use the magic
70  # goto version everywhere.
71  my $wrapper =
72      $Config{usethreads}
73        ? eval "sub $proto { &_memoizer(\$cref, \@_); }"
74        : eval "sub $proto { unshift \@_, \$cref; goto &_memoizer; }";
75
76  my $normalizer = $options{NORMALIZER};
77  if (defined $normalizer  && ! ref $normalizer) {
78    $normalizer = _make_cref($normalizer, $uppack);
79  }
80
81  my $install_name;
82  if (defined $options->{INSTALL}) {
83    # INSTALL => name
84    $install_name = $options->{INSTALL};
85  } elsif (! exists $options->{INSTALL}) {
86    # No INSTALL option provided; use original name if possible
87    $install_name = $name;
88  } else {
89    # INSTALL => undef  means don't install
90  }
91
92  if (defined $install_name) {
93    $install_name = $uppack . '::' . $install_name
94	unless $install_name =~ /::/;
95    no strict;
96    local($^W) = 0;	       # ``Subroutine $install_name redefined at ...''
97    *{$install_name} = $wrapper; # Install memoized version
98  }
99
100  $revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key
101
102  # These will be the caches
103  my %caches;
104  for my $context (qw(SCALAR LIST)) {
105    # suppress subsequent 'uninitialized value' warnings
106    $options{"${context}_CACHE"} ||= '';
107
108    my $cache_opt = $options{"${context}_CACHE"};
109    my @cache_opt_args;
110    if (ref $cache_opt) {
111      @cache_opt_args = @$cache_opt;
112      $cache_opt = shift @cache_opt_args;
113    }
114    if ($cache_opt eq 'FAULT') { # no cache
115      $caches{$context} = undef;
116    } elsif ($cache_opt eq 'HASH') { # user-supplied hash
117      my $cache = $cache_opt_args[0];
118      my $package = ref(tied %$cache);
119      if ($context eq 'LIST' && $scalar_only{$package}) {
120        croak("You can't use $package for LIST_CACHE because it can only store scalars");
121      }
122      $caches{$context} = $cache;
123    } elsif ($cache_opt eq '' ||  $IS_CACHE_TAG{$cache_opt}) {
124      # default is that we make up an in-memory hash
125      $caches{$context} = {};
126      # (this might get tied later, or MERGEd away)
127    } else {
128      croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting";
129    }
130  }
131
132  # Perhaps I should check here that you didn't supply *both* merge
133  # options.  But if you did, it does do something reasonable: They
134  # both get merged to the same in-memory hash.
135  if ($options{SCALAR_CACHE} eq 'MERGE' || $options{LIST_CACHE} eq 'MERGE') {
136    $options{MERGED} = 1;
137    $caches{SCALAR} = $caches{LIST};
138  }
139
140  # Now deal with the TIE options
141  {
142    my $context;
143    foreach $context (qw(SCALAR LIST)) {
144      # If the relevant option wasn't `TIE', this call does nothing.
145      _my_tie($context, $caches{$context}, $options);  # Croaks on failure
146    }
147  }
148
149  # We should put some more stuff in here eventually.
150  # We've been saying that for serveral versions now.
151  # And you know what?  More stuff keeps going in!
152  $memotable{$cref} =
153  {
154    O => $options,  # Short keys here for things we need to access frequently
155    N => $normalizer,
156    U => $cref,
157    MEMOIZED => $wrapper,
158    PACKAGE => $uppack,
159    NAME => $install_name,
160    S => $caches{SCALAR},
161    L => $caches{LIST},
162  };
163
164  $wrapper			# Return just memoized version
165}
166
167# This function tries to load a tied hash class and tie the hash to it.
168sub _my_tie {
169  my ($context, $hash, $options) = @_;
170  my $fullopt = $options->{"${context}_CACHE"};
171
172  # We already checked to make sure that this works.
173  my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt;
174
175  return unless defined $shortopt && $shortopt eq 'TIE';
176  carp("TIE option to memoize() is deprecated; use HASH instead")
177      if $^W;
178
179  my @args = ref $fullopt ? @$fullopt : ();
180  shift @args;
181  my $module = shift @args;
182  if ($context eq 'LIST' && $scalar_only{$module}) {
183    croak("You can't use $module for LIST_CACHE because it can only store scalars");
184  }
185  my $modulefile = $module . '.pm';
186  $modulefile =~ s{::}{/}g;
187  eval {
188    local @INC = @INC;
189    pop @INC if $INC[-1] eq '.';
190    require $modulefile
191  };
192  if ($@) {
193    croak "Memoize: Couldn't load hash tie module `$module': $@; aborting";
194  }
195  my $rc = (tie %$hash => $module, @args);
196  unless ($rc) {
197    croak "Memoize: Couldn't tie hash to `$module': $!; aborting";
198  }
199  1;
200}
201
202sub flush_cache {
203  my $func = _make_cref($_[0], scalar caller);
204  my $info = $memotable{$revmemotable{$func}};
205  die "$func not memoized" unless defined $info;
206  for my $context (qw(S L)) {
207    my $cache = $info->{$context};
208    if (tied %$cache && ! (tied %$cache)->can('CLEAR')) {
209      my $funcname = defined($info->{NAME}) ?
210          "function $info->{NAME}" : "anonymous function $func";
211      my $context = {S => 'scalar', L => 'list'}->{$context};
212      croak "Tied cache hash for $context-context $funcname does not support flushing";
213    } else {
214      %$cache = ();
215    }
216  }
217}
218
219# This is the function that manages the memo tables.
220sub _memoizer {
221  my $orig = shift;		# stringized version of ref to original func.
222  my $info = $memotable{$orig};
223  my $normalizer = $info->{N};
224
225  my $argstr;
226  my $context = (wantarray() ? LIST : SCALAR);
227
228  if (defined $normalizer) {
229    no strict;
230    if ($context == SCALAR) {
231      $argstr = &{$normalizer}(@_);
232    } elsif ($context == LIST) {
233      ($argstr) = &{$normalizer}(@_);
234    } else {
235      croak "Internal error \#41; context was neither LIST nor SCALAR\n";
236    }
237  } else {                      # Default normalizer
238    local $^W = 0;
239    $argstr = join chr(28),@_;
240  }
241
242  if ($context == SCALAR) {
243    my $cache = $info->{S};
244    _crap_out($info->{NAME}, 'scalar') unless $cache;
245    if (exists $cache->{$argstr}) {
246      return $info->{O}{MERGED}
247        ? $cache->{$argstr}[0] : $cache->{$argstr};
248    } else {
249      my $val = &{$info->{U}}(@_);
250      # Scalars are considered to be lists; store appropriately
251      if ($info->{O}{MERGED}) {
252	$cache->{$argstr} = [$val];
253      } else {
254	$cache->{$argstr} = $val;
255      }
256      $val;
257    }
258  } elsif ($context == LIST) {
259    my $cache = $info->{L};
260    _crap_out($info->{NAME}, 'list') unless $cache;
261    if (exists $cache->{$argstr}) {
262      return @{$cache->{$argstr}};
263    } else {
264      my @q = &{$info->{U}}(@_);
265      $cache->{$argstr} = \@q;
266      @q;
267    }
268  } else {
269    croak "Internal error \#42; context was neither LIST nor SCALAR\n";
270  }
271}
272
273sub unmemoize {
274  my $f = shift;
275  my $uppack = caller;
276  my $cref = _make_cref($f, $uppack);
277
278  unless (exists $revmemotable{$cref}) {
279    croak "Could not unmemoize function `$f', because it was not memoized to begin with";
280  }
281
282  my $tabent = $memotable{$revmemotable{$cref}};
283  unless (defined $tabent) {
284    croak "Could not figure out how to unmemoize function `$f'";
285  }
286  my $name = $tabent->{NAME};
287  if (defined $name) {
288    no strict;
289    local($^W) = 0;	       # ``Subroutine $install_name redefined at ...''
290    *{$name} = $tabent->{U}; # Replace with original function
291  }
292  undef $memotable{$revmemotable{$cref}};
293  undef $revmemotable{$cref};
294
295  # This removes the last reference to the (possibly tied) memo tables
296  # my ($old_function, $memotabs) = @{$tabent}{'U','S','L'};
297  # undef $tabent;
298
299#  # Untie the memo tables if they were tied.
300#  my $i;
301#  for $i (0,1) {
302#    if (tied %{$memotabs->[$i]}) {
303#      warn "Untying hash #$i\n";
304#      untie %{$memotabs->[$i]};
305#    }
306#  }
307
308  $tabent->{U};
309}
310
311sub _make_cref {
312  my $fn = shift;
313  my $uppack = shift;
314  my $cref;
315  my $name;
316
317  if (ref $fn eq 'CODE') {
318    $cref = $fn;
319  } elsif (! ref $fn) {
320    if ($fn =~ /::/) {
321      $name = $fn;
322    } else {
323      $name = $uppack . '::' . $fn;
324    }
325    no strict;
326    if (defined $name and !defined(&$name)) {
327      croak "Cannot operate on nonexistent function `$fn'";
328    }
329#    $cref = \&$name;
330    $cref = *{$name}{CODE};
331  } else {
332    my $parent = (caller(1))[3]; # Function that called _make_cref
333    croak "Usage: argument 1 to `$parent' must be a function name or reference.\n";
334  }
335  $DEBUG and warn "${name}($fn) => $cref in _make_cref\n";
336  $cref;
337}
338
339sub _crap_out {
340  my ($funcname, $context) = @_;
341  if (defined $funcname) {
342    croak "Function `$funcname' called in forbidden $context context; faulting";
343  } else {
344    croak "Anonymous function called in forbidden $context context; faulting";
345  }
346}
347
3481;
349
350
351
352
353
354=head1 NAME
355
356Memoize - Make functions faster by trading space for time
357
358=head1 SYNOPSIS
359
360        # This is the documentation for Memoize 1.03
361	use Memoize;
362	memoize('slow_function');
363	slow_function(arguments);    # Is faster than it was before
364
365
366This is normally all you need to know.  However, many options are available:
367
368	memoize(function, options...);
369
370Options include:
371
372	NORMALIZER => function
373	INSTALL => new_name
374
375	SCALAR_CACHE => 'MEMORY'
376        SCALAR_CACHE => ['HASH', \%cache_hash ]
377	SCALAR_CACHE => 'FAULT'
378	SCALAR_CACHE => 'MERGE'
379
380	LIST_CACHE => 'MEMORY'
381        LIST_CACHE => ['HASH', \%cache_hash ]
382	LIST_CACHE => 'FAULT'
383	LIST_CACHE => 'MERGE'
384
385=head1 DESCRIPTION
386
387`Memoizing' a function makes it faster by trading space for time.  It
388does this by caching the return values of the function in a table.
389If you call the function again with the same arguments, C<memoize>
390jumps in and gives you the value out of the table, instead of letting
391the function compute the value all over again.
392
393Here is an extreme example.  Consider the Fibonacci sequence, defined
394by the following function:
395
396	# Compute Fibonacci numbers
397	sub fib {
398	  my $n = shift;
399	  return $n if $n < 2;
400	  fib($n-1) + fib($n-2);
401	}
402
403This function is very slow.  Why?  To compute fib(14), it first wants
404to compute fib(13) and fib(12), and add the results.  But to compute
405fib(13), it first has to compute fib(12) and fib(11), and then it
406comes back and computes fib(12) all over again even though the answer
407is the same.  And both of the times that it wants to compute fib(12),
408it has to compute fib(11) from scratch, and then it has to do it
409again each time it wants to compute fib(13).  This function does so
410much recomputing of old results that it takes a really long time to
411run---fib(14) makes 1,200 extra recursive calls to itself, to compute
412and recompute things that it already computed.
413
414This function is a good candidate for memoization.  If you memoize the
415`fib' function above, it will compute fib(14) exactly once, the first
416time it needs to, and then save the result in a table.  Then if you
417ask for fib(14) again, it gives you the result out of the table.
418While computing fib(14), instead of computing fib(12) twice, it does
419it once; the second time it needs the value it gets it from the table.
420It doesn't compute fib(11) four times; it computes it once, getting it
421from the table the next three times.  Instead of making 1,200
422recursive calls to `fib', it makes 15.  This makes the function about
423150 times faster.
424
425You could do the memoization yourself, by rewriting the function, like
426this:
427
428	# Compute Fibonacci numbers, memoized version
429	{ my @fib;
430  	  sub fib {
431	    my $n = shift;
432	    return $fib[$n] if defined $fib[$n];
433	    return $fib[$n] = $n if $n < 2;
434	    $fib[$n] = fib($n-1) + fib($n-2);
435	  }
436        }
437
438Or you could use this module, like this:
439
440	use Memoize;
441	memoize('fib');
442
443	# Rest of the fib function just like the original version.
444
445This makes it easy to turn memoizing on and off.
446
447Here's an even simpler example: I wrote a simple ray tracer; the
448program would look in a certain direction, figure out what it was
449looking at, and then convert the `color' value (typically a string
450like `red') of that object to a red, green, and blue pixel value, like
451this:
452
453    for ($direction = 0; $direction < 300; $direction++) {
454      # Figure out which object is in direction $direction
455      $color = $object->{color};
456      ($r, $g, $b) = @{&ColorToRGB($color)};
457      ...
458    }
459
460Since there are relatively few objects in a picture, there are only a
461few colors, which get looked up over and over again.  Memoizing
462C<ColorToRGB> sped up the program by several percent.
463
464=head1 DETAILS
465
466This module exports exactly one function, C<memoize>.  The rest of the
467functions in this package are None of Your Business.
468
469You should say
470
471	memoize(function)
472
473where C<function> is the name of the function you want to memoize, or
474a reference to it.  C<memoize> returns a reference to the new,
475memoized version of the function, or C<undef> on a non-fatal error.
476At present, there are no non-fatal errors, but there might be some in
477the future.
478
479If C<function> was the name of a function, then C<memoize> hides the
480old version and installs the new memoized version under the old name,
481so that C<&function(...)> actually invokes the memoized version.
482
483=head1 OPTIONS
484
485There are some optional options you can pass to C<memoize> to change
486the way it behaves a little.  To supply options, invoke C<memoize>
487like this:
488
489	memoize(function, NORMALIZER => function,
490			  INSTALL => newname,
491                          SCALAR_CACHE => option,
492	                  LIST_CACHE => option
493			 );
494
495Each of these options is optional; you can include some, all, or none
496of them.
497
498=head2 INSTALL
499
500If you supply a function name with C<INSTALL>, memoize will install
501the new, memoized version of the function under the name you give.
502For example,
503
504	memoize('fib', INSTALL => 'fastfib')
505
506installs the memoized version of C<fib> as C<fastfib>; without the
507C<INSTALL> option it would have replaced the old C<fib> with the
508memoized version.
509
510To prevent C<memoize> from installing the memoized version anywhere, use
511C<INSTALL =E<gt> undef>.
512
513=head2 NORMALIZER
514
515Suppose your function looks like this:
516
517	# Typical call: f('aha!', A => 11, B => 12);
518	sub f {
519	  my $a = shift;
520	  my %hash = @_;
521	  $hash{B} ||= 2;  # B defaults to 2
522	  $hash{C} ||= 7;  # C defaults to 7
523
524	  # Do something with $a, %hash
525	}
526
527Now, the following calls to your function are all completely equivalent:
528
529	f(OUCH);
530	f(OUCH, B => 2);
531	f(OUCH, C => 7);
532	f(OUCH, B => 2, C => 7);
533	f(OUCH, C => 7, B => 2);
534	(etc.)
535
536However, unless you tell C<Memoize> that these calls are equivalent,
537it will not know that, and it will compute the values for these
538invocations of your function separately, and store them separately.
539
540To prevent this, supply a C<NORMALIZER> function that turns the
541program arguments into a string in a way that equivalent arguments
542turn into the same string.  A C<NORMALIZER> function for C<f> above
543might look like this:
544
545	sub normalize_f {
546	  my $a = shift;
547	  my %hash = @_;
548	  $hash{B} ||= 2;
549	  $hash{C} ||= 7;
550
551	  join(',', $a, map ($_ => $hash{$_}) sort keys %hash);
552	}
553
554Each of the argument lists above comes out of the C<normalize_f>
555function looking exactly the same, like this:
556
557	OUCH,B,2,C,7
558
559You would tell C<Memoize> to use this normalizer this way:
560
561	memoize('f', NORMALIZER => 'normalize_f');
562
563C<memoize> knows that if the normalized version of the arguments is
564the same for two argument lists, then it can safely look up the value
565that it computed for one argument list and return it as the result of
566calling the function with the other argument list, even if the
567argument lists look different.
568
569The default normalizer just concatenates the arguments with character
57028 in between.  (In ASCII, this is called FS or control-\.)  This
571always works correctly for functions with only one string argument,
572and also when the arguments never contain character 28.  However, it
573can confuse certain argument lists:
574
575	normalizer("a\034", "b")
576	normalizer("a", "\034b")
577	normalizer("a\034\034b")
578
579for example.
580
581Since hash keys are strings, the default normalizer will not
582distinguish between C<undef> and the empty string.  It also won't work
583when the function's arguments are references.  For example, consider a
584function C<g> which gets two arguments: A number, and a reference to
585an array of numbers:
586
587	g(13, [1,2,3,4,5,6,7]);
588
589The default normalizer will turn this into something like
590C<"13\034ARRAY(0x436c1f)">.  That would be all right, except that a
591subsequent array of numbers might be stored at a different location
592even though it contains the same data.  If this happens, C<Memoize>
593will think that the arguments are different, even though they are
594equivalent.  In this case, a normalizer like this is appropriate:
595
596	sub normalize { join ' ', $_[0], @{$_[1]} }
597
598For the example above, this produces the key "13 1 2 3 4 5 6 7".
599
600Another use for normalizers is when the function depends on data other
601than those in its arguments.  Suppose you have a function which
602returns a value which depends on the current hour of the day:
603
604	sub on_duty {
605          my ($problem_type) = @_;
606	  my $hour = (localtime)[2];
607          open my $fh, "$DIR/$problem_type" or die...;
608          my $line;
609          while ($hour-- > 0)
610            $line = <$fh>;
611          }
612	  return $line;
613	}
614
615At 10:23, this function generates the 10th line of a data file; at
6163:45 PM it generates the 15th line instead.  By default, C<Memoize>
617will only see the $problem_type argument.  To fix this, include the
618current hour in the normalizer:
619
620        sub normalize { join ' ', (localtime)[2], @_ }
621
622The calling context of the function (scalar or list context) is
623propagated to the normalizer.  This means that if the memoized
624function will treat its arguments differently in list context than it
625would in scalar context, you can have the normalizer function select
626its behavior based on the results of C<wantarray>.  Even if called in
627a list context, a normalizer should still return a single string.
628
629=head2 C<SCALAR_CACHE>, C<LIST_CACHE>
630
631Normally, C<Memoize> caches your function's return values into an
632ordinary Perl hash variable.  However, you might like to have the
633values cached on the disk, so that they persist from one run of your
634program to the next, or you might like to associate some other
635interesting semantics with the cached values.
636
637There's a slight complication under the hood of C<Memoize>: There are
638actually I<two> caches, one for scalar values and one for list values.
639When your function is called in scalar context, its return value is
640cached in one hash, and when your function is called in list context,
641its value is cached in the other hash.  You can control the caching
642behavior of both contexts independently with these options.
643
644The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of
645the following four strings:
646
647	MEMORY
648	FAULT
649	MERGE
650        HASH
651
652or else it must be a reference to an array whose first element is one of
653these four strings, such as C<[HASH, arguments...]>.
654
655=over 4
656
657=item C<MEMORY>
658
659C<MEMORY> means that return values from the function will be cached in
660an ordinary Perl hash variable.  The hash variable will not persist
661after the program exits.  This is the default.
662
663=item C<HASH>
664
665C<HASH> allows you to specify that a particular hash that you supply
666will be used as the cache.  You can tie this hash beforehand to give
667it any behavior you want.
668
669A tied hash can have any semantics at all.  It is typically tied to an
670on-disk database, so that cached values are stored in the database and
671retrieved from it again when needed, and the disk file typically
672persists after your program has exited.  See C<perltie> for more
673complete details about C<tie>.
674
675A typical example is:
676
677        use DB_File;
678        tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666;
679        memoize 'function', SCALAR_CACHE => [HASH => \%cache];
680
681This has the effect of storing the cache in a C<DB_File> database
682whose name is in C<$filename>.  The cache will persist after the
683program has exited.  Next time the program runs, it will find the
684cache already populated from the previous run of the program.  Or you
685can forcibly populate the cache by constructing a batch program that
686runs in the background and populates the cache file.  Then when you
687come to run your real program the memoized function will be fast
688because all its results have been precomputed.
689
690Another reason to use C<HASH> is to provide your own hash variable.
691You can then inspect or modify the contents of the hash to gain finer
692control over the cache management.
693
694=item C<TIE>
695
696This option is no longer supported.  It is still documented only to
697aid in the debugging of old programs that use it.  Old programs should
698be converted to use the C<HASH> option instead.
699
700        memoize ... ['TIE', PACKAGE, ARGS...]
701
702is merely a shortcut for
703
704        require PACKAGE;
705	{ tie my %cache, PACKAGE, ARGS...;
706          memoize ... [HASH => \%cache];
707        }
708
709=item C<FAULT>
710
711C<FAULT> means that you never expect to call the function in scalar
712(or list) context, and that if C<Memoize> detects such a call, it
713should abort the program.  The error message is one of
714
715	`foo' function called in forbidden list context at line ...
716	`foo' function called in forbidden scalar context at line ...
717
718=item C<MERGE>
719
720C<MERGE> normally means that the memoized function does not
721distinguish between list and sclar context, and that return values in
722both contexts should be stored together.  Both C<LIST_CACHE =E<gt>
723MERGE> and C<SCALAR_CACHE =E<gt> MERGE> mean the same thing.
724
725Consider this function:
726
727	sub complicated {
728          # ... time-consuming calculation of $result
729          return $result;
730        }
731
732The C<complicated> function will return the same numeric C<$result>
733regardless of whether it is called in list or in scalar context.
734
735Normally, the following code will result in two calls to C<complicated>, even
736if C<complicated> is memoized:
737
738    $x = complicated(142);
739    ($y) = complicated(142);
740    $z = complicated(142);
741
742The first call will cache the result, say 37, in the scalar cache; the
743second will cach the list C<(37)> in the list cache.  The third call
744doesn't call the real C<complicated> function; it gets the value 37
745from the scalar cache.
746
747Obviously, the second call to C<complicated> is a waste of time, and
748storing its return value is a waste of space.  Specifying C<LIST_CACHE
749=E<gt> MERGE> will make C<memoize> use the same cache for scalar and
750list context return values, so that the second call uses the scalar
751cache that was populated by the first call.  C<complicated> ends up
752being called only once, and both subsequent calls return C<3> from the
753cache, regardless of the calling context.
754
755=head3 List values in scalar context
756
757Consider this function:
758
759    sub iota { return reverse (1..$_[0]) }
760
761This function normally returns a list.  Suppose you memoize it and
762merge the caches:
763
764    memoize 'iota', SCALAR_CACHE => 'MERGE';
765
766    @i7 = iota(7);
767    $i7 = iota(7);
768
769Here the first call caches the list (1,2,3,4,5,6,7).  The second call
770does not really make sense. C<Memoize> cannot guess what behavior
771C<iota> should have in scalar context without actually calling it in
772scalar context.  Normally C<Memoize> I<would> call C<iota> in scalar
773context and cache the result, but the C<SCALAR_CACHE =E<gt> 'MERGE'>
774option says not to do that, but to use the cache list-context value
775instead. But it cannot return a list of seven elements in a scalar
776context. In this case C<$i7> will receive the B<first element> of the
777cached list value, namely 7.
778
779=head3 Merged disk caches
780
781Another use for C<MERGE> is when you want both kinds of return values
782stored in the same disk file; this saves you from having to deal with
783two disk files instead of one.  You can use a normalizer function to
784keep the two sets of return values separate.  For example:
785
786        tie my %cache => 'MLDBM', 'DB_File', $filename, ...;
787
788	memoize 'myfunc',
789	  NORMALIZER => 'n',
790	  SCALAR_CACHE => [HASH => \%cache],
791	  LIST_CACHE => 'MERGE',
792	;
793
794	sub n {
795	  my $context = wantarray() ? 'L' : 'S';
796	  # ... now compute the hash key from the arguments ...
797	  $hashkey = "$context:$hashkey";
798	}
799
800This normalizer function will store scalar context return values in
801the disk file under keys that begin with C<S:>, and list context
802return values under keys that begin with C<L:>.
803
804=back
805
806=head1 OTHER FACILITIES
807
808=head2 C<unmemoize>
809
810There's an C<unmemoize> function that you can import if you want to.
811Why would you want to?  Here's an example: Suppose you have your cache
812tied to a DBM file, and you want to make sure that the cache is
813written out to disk if someone interrupts the program.  If the program
814exits normally, this will happen anyway, but if someone types
815control-C or something then the program will terminate immediately
816without synchronizing the database.  So what you can do instead is
817
818    $SIG{INT} = sub { unmemoize 'function' };
819
820C<unmemoize> accepts a reference to, or the name of a previously
821memoized function, and undoes whatever it did to provide the memoized
822version in the first place, including making the name refer to the
823unmemoized version if appropriate.  It returns a reference to the
824unmemoized version of the function.
825
826If you ask it to unmemoize a function that was never memoized, it
827croaks.
828
829=head2 C<flush_cache>
830
831C<flush_cache(function)> will flush out the caches, discarding I<all>
832the cached data.  The argument may be a function name or a reference
833to a function.  For finer control over when data is discarded or
834expired, see the documentation for C<Memoize::Expire>, included in
835this package.
836
837Note that if the cache is a tied hash, C<flush_cache> will attempt to
838invoke the C<CLEAR> method on the hash.  If there is no C<CLEAR>
839method, this will cause a run-time error.
840
841An alternative approach to cache flushing is to use the C<HASH> option
842(see above) to request that C<Memoize> use a particular hash variable
843as its cache.  Then you can examine or modify the hash at any time in
844any way you desire.  You may flush the cache by using C<%hash = ()>.
845
846=head1 CAVEATS
847
848Memoization is not a cure-all:
849
850=over 4
851
852=item *
853
854Do not memoize a function whose behavior depends on program
855state other than its own arguments, such as global variables, the time
856of day, or file input.  These functions will not produce correct
857results when memoized.  For a particularly easy example:
858
859	sub f {
860	  time;
861	}
862
863This function takes no arguments, and as far as C<Memoize> is
864concerned, it always returns the same result.  C<Memoize> is wrong, of
865course, and the memoized version of this function will call C<time> once
866to get the current time, and it will return that same time
867every time you call it after that.
868
869=item *
870
871Do not memoize a function with side effects.
872
873	sub f {
874	  my ($a, $b) = @_;
875          my $s = $a + $b;
876	  print "$a + $b = $s.\n";
877	}
878
879This function accepts two arguments, adds them, and prints their sum.
880Its return value is the numuber of characters it printed, but you
881probably didn't care about that.  But C<Memoize> doesn't understand
882that.  If you memoize this function, you will get the result you
883expect the first time you ask it to print the sum of 2 and 3, but
884subsequent calls will return 1 (the return value of
885C<print>) without actually printing anything.
886
887=item *
888
889Do not memoize a function that returns a data structure that is
890modified by its caller.
891
892Consider these functions:  C<getusers> returns a list of users somehow,
893and then C<main> throws away the first user on the list and prints the
894rest:
895
896	sub main {
897	  my $userlist = getusers();
898	  shift @$userlist;
899	  foreach $u (@$userlist) {
900	    print "User $u\n";
901	  }
902	}
903
904	sub getusers {
905	  my @users;
906	  # Do something to get a list of users;
907	  \@users;  # Return reference to list.
908	}
909
910If you memoize C<getusers> here, it will work right exactly once.  The
911reference to the users list will be stored in the memo table.  C<main>
912will discard the first element from the referenced list.  The next
913time you invoke C<main>, C<Memoize> will not call C<getusers>; it will
914just return the same reference to the same list it got last time.  But
915this time the list has already had its head removed; C<main> will
916erroneously remove another element from it.  The list will get shorter
917and shorter every time you call C<main>.
918
919Similarly, this:
920
921	$u1 = getusers();
922	$u2 = getusers();
923	pop @$u1;
924
925will modify $u2 as well as $u1, because both variables are references
926to the same array.  Had C<getusers> not been memoized, $u1 and $u2
927would have referred to different arrays.
928
929=item *
930
931Do not memoize a very simple function.
932
933Recently someone mentioned to me that the Memoize module made his
934program run slower instead of faster.  It turned out that he was
935memoizing the following function:
936
937    sub square {
938      $_[0] * $_[0];
939    }
940
941I pointed out that C<Memoize> uses a hash, and that looking up a
942number in the hash is necessarily going to take a lot longer than a
943single multiplication.  There really is no way to speed up the
944C<square> function.
945
946Memoization is not magical.
947
948=back
949
950=head1 PERSISTENT CACHE SUPPORT
951
952You can tie the cache tables to any sort of tied hash that you want
953to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and
954C<EXISTS>.  For example,
955
956        tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666;
957        memoize 'function', SCALAR_CACHE => [HASH => \%cache];
958
959works just fine.  For some storage methods, you need a little glue.
960
961C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this
962package is a glue module called C<Memoize::SDBM_File> which does
963provide one.  Use this instead of plain C<SDBM_File> to store your
964cache table on disk in an C<SDBM_File> database:
965
966        tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666;
967        memoize 'function', SCALAR_CACHE => [HASH => \%cache];
968
969C<NDBM_File> has the same problem and the same solution.  (Use
970C<Memoize::NDBM_File instead of plain NDBM_File.>)
971
972C<Storable> isn't a tied hash class at all.  You can use it to store a
973hash to disk and retrieve it again, but you can't modify the hash while
974it's on the disk.  So if you want to store your cache table in a
975C<Storable> database, use C<Memoize::Storable>, which puts a hashlike
976front-end onto C<Storable>.  The hash table is actually kept in
977memory, and is loaded from your C<Storable> file at the time you
978memoize the function, and stored back at the time you unmemoize the
979function (or when your program exits):
980
981        tie my %cache => 'Memoize::Storable', $filename;
982	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
983
984        tie my %cache => 'Memoize::Storable', $filename, 'nstore';
985	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
986
987Include the `nstore' option to have the C<Storable> database written
988in `network order'.  (See L<Storable> for more details about this.)
989
990The C<flush_cache()> function will raise a run-time error unless the
991tied package provides a C<CLEAR> method.
992
993=head1 EXPIRATION SUPPORT
994
995See Memoize::Expire, which is a plug-in module that adds expiration
996functionality to Memoize.  If you don't like the kinds of policies
997that Memoize::Expire implements, it is easy to write your own plug-in
998module to implement whatever policy you desire.  Memoize comes with
999several examples.  An expiration manager that implements a LRU policy
1000is available on CPAN as Memoize::ExpireLRU.
1001
1002=head1 BUGS
1003
1004The test suite is much better, but always needs improvement.
1005
1006There is some problem with the way C<goto &f> works under threaded
1007Perl, perhaps because of the lexical scoping of C<@_>.  This is a bug
1008in Perl, and until it is resolved, memoized functions will see a
1009slightly different C<caller()> and will perform a little more slowly
1010on threaded perls than unthreaded perls.
1011
1012Some versions of C<DB_File> won't let you store data under a key of
1013length 0.  That means that if you have a function C<f> which you
1014memoized and the cache is in a C<DB_File> database, then the value of
1015C<f()> (C<f> called with no arguments) will not be memoized.  If this
1016is a big problem, you can supply a normalizer function that prepends
1017C<"x"> to every key.
1018
1019=head1 MAILING LIST
1020
1021To join a very low-traffic mailing list for announcements about
1022C<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>.
1023
1024=head1 AUTHOR
1025
1026Mark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co.
1027
1028See the C<Memoize.pm> Page at http://perl.plover.com/Memoize/
1029for news and upgrades.  Near this page, at
1030http://perl.plover.com/MiniMemoize/ there is an article about
1031memoization and about the internals of Memoize that appeared in The
1032Perl Journal, issue #13.  (This article is also included in the
1033Memoize distribution as `article.html'.)
1034
1035The author's book I<Higher-Order Perl> (2005, ISBN 1558607013, published
1036by Morgan Kaufmann) discusses memoization (and many other
1037topics) in tremendous detail. It is available on-line for free.
1038For more information, visit http://hop.perl.plover.com/ .
1039
1040To join a mailing list for announcements about C<Memoize>, send an
1041empty message to C<mjd-perl-memoize-request@plover.com>.  This mailing
1042list is for announcements only and has extremely low traffic---fewer than
1043two messages per year.
1044
1045=head1 COPYRIGHT AND LICENSE
1046
1047Copyright 1998, 1999, 2000, 2001, 2012  by Mark Jason Dominus
1048
1049This library is free software; you may redistribute it and/or modify
1050it under the same terms as Perl itself.
1051
1052=head1 THANK YOU
1053
1054Many thanks to Florian Ragwitz for administration and packaging
1055assistance, to John Tromp for bug reports, to Jonathan Roy for bug reports
1056and suggestions, to Michael Schwern for other bug reports and patches,
1057to Mike Cariaso for helping me to figure out the Right Thing to Do
1058About Expiration, to Joshua Gerth, Joshua Chamas, Jonathan Roy
1059(again), Mark D. Anderson, and Andrew Johnson for more suggestions
1060about expiration, to Brent Powers for the Memoize::ExpireLRU module,
1061to Ariel Scolnicov for delightful messages about the Fibonacci
1062function, to Dion Almaer for thought-provoking suggestions about the
1063default normalizer, to Walt Mankowski and Kurt Starsinic for much help
1064investigating problems under threaded Perl, to Alex Dudkevich for
1065reporting the bug in prototyped functions and for checking my patch,
1066to Tony Bass for many helpful suggestions, to Jonathan Roy (again) for
1067finding a use for C<unmemoize()>, to Philippe Verdret for enlightening
1068discussion of C<Hook::PrePostCall>, to Nat Torkington for advice I
1069ignored, to Chris Nandor for portability advice, to Randal Schwartz
1070for suggesting the 'C<flush_cache> function, and to Jenda Krynicky for
1071being a light in the world.
1072
1073Special thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including
1074this module in the core and for his patient and helpful guidance
1075during the integration process.
1076
1077=cut
1078