1/*
2 *
3 * Copyright 2020 gRPC authors.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19// Package cache provides an LRU cache implementation to be used by the RLS LB
20// policy to cache RLS response data.
21package cache
22
23import (
24	"container/list"
25	"sync"
26	"time"
27
28	"google.golang.org/grpc/balancer"
29	"google.golang.org/grpc/grpclog"
30	"google.golang.org/grpc/internal/backoff"
31)
32
33var logger = grpclog.Component("rls")
34
35// Key represents the cache key used to uniquely identify a cache entry.
36type Key struct {
37	// Path is the full path of the incoming RPC request.
38	Path string
39	// KeyMap is a stringified version of the RLS request keys built using the
40	// RLS keyBuilder. Since map is not a Type which is comparable in Go, it
41	// cannot be part of the key for another map (the LRU cache is implemented
42	// using a native map type).
43	KeyMap string
44}
45
46// Entry wraps all the data to be stored in a cache entry.
47type Entry struct {
48	// Mu synchronizes access to this particular cache entry. The LB policy
49	// will also hold another mutex to synchronize access to the cache as a
50	// whole. To avoid holding the top-level mutex for the whole duration for
51	// which one particular cache entry is acted upon, we use this entry mutex.
52	Mu sync.Mutex
53	// ExpiryTime is the absolute time at which the data cached as part of this
54	// entry stops being valid. When an RLS request succeeds, this is set to
55	// the current time plus the max_age field from the LB policy config. An
56	// entry with this field in the past is not used to process picks.
57	ExpiryTime time.Time
58	// BackoffExpiryTime is the absolute time at which an entry which has gone
59	// through backoff stops being valid.  When an RLS request fails, this is
60	// set to the current time plus twice the backoff time. The cache expiry
61	// timer will only delete entries for which both ExpiryTime and
62	// BackoffExpiryTime are in the past.
63	BackoffExpiryTime time.Time
64	// StaleTime is the absolute time after which this entry will be
65	// proactively refreshed if we receive a request for it. When an RLS
66	// request succeeds, this is set to the current time plus the stale_age
67	// from the LB policy config.
68	StaleTime time.Time
69	// BackoffTime is the absolute time at which the backoff period for this
70	// entry ends. The backoff timer is setup with this value. No new RLS
71	// requests are sent out for this entry until the backoff period ends.
72	BackoffTime time.Time
73	// EarliestEvictTime is the absolute time before which this entry should
74	// not be evicted from the cache. This is set to a default value of 5
75	// seconds when the entry is created. This is required to make sure that a
76	// new entry added to the cache is not evicted before the RLS response
77	// arrives (usually when the cache is too small).
78	EarliestEvictTime time.Time
79	// CallStatus stores the RPC status of the previous RLS request for this
80	// entry. Picks for entries with a non-nil value for this field are failed
81	// with the error stored here.
82	CallStatus error
83	// Backoff contains all backoff related state. When an RLS request
84	// succeeds, backoff state is reset.
85	Backoff BackoffState
86	// HeaderData is received in an RLS response and is to be sent in the
87	// X-Google-RLS-Data header for matching RPCs.
88	HeaderData string
89	// ChildPicker is a very thin wrapper around the child policy wrapper.
90	// The type is declared as a Picker interface since the users of
91	// the cache only care about the picker provided by the child policy, and
92	// this makes it easy for testing.
93	ChildPicker balancer.Picker
94
95	// size stores the size of this cache entry. Uses only a subset of the
96	// fields. See `entrySize` for this is computed.
97	size int64
98	// key contains the cache key corresponding to this entry. This is required
99	// from methods like `removeElement` which only have a pointer to the
100	// list.Element which contains a reference to the cache.Entry. But these
101	// methods need the cache.Key to be able to remove the entry from the
102	// underlying map.
103	key Key
104}
105
106// BackoffState wraps all backoff related state associated with a cache entry.
107type BackoffState struct {
108	// Retries keeps track of the number of RLS failures, to be able to
109	// determine the amount of time to backoff before the next attempt.
110	Retries int
111	// Backoff is an exponential backoff implementation which returns the
112	// amount of time to backoff, given the number of retries.
113	Backoff backoff.Strategy
114	// Timer fires when the backoff period ends and incoming requests after
115	// this will trigger a new RLS request.
116	Timer *time.Timer
117	// Callback provided by the LB policy to be notified when the backoff timer
118	// expires. This will trigger a new picker to be returned to the
119	// ClientConn, to force queued up RPCs to be retried.
120	Callback func()
121}
122
123// LRU is a cache with a least recently used eviction policy. It is not safe
124// for concurrent access.
125type LRU struct {
126	maxSize   int64
127	usedSize  int64
128	onEvicted func(Key, *Entry)
129
130	ll    *list.List
131	cache map[Key]*list.Element
132}
133
134// NewLRU creates a cache.LRU with a size limit of maxSize and the provided
135// eviction callback.
136//
137// Currently, only the cache.Key and the HeaderData field from cache.Entry
138// count towards the size of the cache (other overhead per cache entry is not
139// counted). The cache could temporarily exceed the configured maxSize because
140// we want the entries to spend a configured minimum amount of time in the
141// cache before they are LRU evicted (so that all the work performed in sending
142// an RLS request and caching the response is not a total waste).
143//
144// The provided onEvited callback must not attempt to re-add the entry inline
145// and the RLS LB policy does not have a need to do that.
146//
147// The cache package trusts the RLS policy (its only user) to supply a default
148// minimum non-zero maxSize, in the event that the ServiceConfig does not
149// provide a value for it.
150func NewLRU(maxSize int64, onEvicted func(Key, *Entry)) *LRU {
151	return &LRU{
152		maxSize:   maxSize,
153		onEvicted: onEvicted,
154		ll:        list.New(),
155		cache:     make(map[Key]*list.Element),
156	}
157}
158
159// Resize sets the size limit of the LRU to newMaxSize and removes older
160// entries, if required, to comply with the new limit.
161func (lru *LRU) Resize(newMaxSize int64) {
162	lru.maxSize = newMaxSize
163	lru.removeToFit(0)
164}
165
166// TODO(easwars): If required, make this function more sophisticated.
167func entrySize(key Key, value *Entry) int64 {
168	return int64(len(key.Path) + len(key.KeyMap) + len(value.HeaderData))
169}
170
171// removeToFit removes older entries from the cache to make room for a new
172// entry of size newSize.
173func (lru *LRU) removeToFit(newSize int64) {
174	now := time.Now()
175	for lru.usedSize+newSize > lru.maxSize {
176		elem := lru.ll.Back()
177		if elem == nil {
178			// This is a corner case where the cache is empty, but the new entry
179			// to be added is bigger than maxSize.
180			logger.Info("rls: newly added cache entry exceeds cache maxSize")
181			return
182		}
183
184		entry := elem.Value.(*Entry)
185		if t := entry.EarliestEvictTime; !t.IsZero() && t.Before(now) {
186			// When the oldest entry is too new (it hasn't even spent a default
187			// minimum amount of time in the cache), we abort and allow the
188			// cache to grow bigger than the configured maxSize.
189			logger.Info("rls: LRU eviction finds oldest entry to be too new. Allowing cache to exceed maxSize momentarily")
190			return
191		}
192		lru.removeElement(elem)
193	}
194}
195
196// Add adds a new entry to the cache.
197func (lru *LRU) Add(key Key, value *Entry) {
198	size := entrySize(key, value)
199	elem, ok := lru.cache[key]
200	if !ok {
201		lru.removeToFit(size)
202		lru.usedSize += size
203		value.size = size
204		value.key = key
205		elem := lru.ll.PushFront(value)
206		lru.cache[key] = elem
207		return
208	}
209
210	existing := elem.Value.(*Entry)
211	sizeDiff := size - existing.size
212	lru.removeToFit(sizeDiff)
213	value.size = size
214	elem.Value = value
215	lru.ll.MoveToFront(elem)
216	lru.usedSize += sizeDiff
217}
218
219// Remove removes a cache entry wth key key, if one exists.
220func (lru *LRU) Remove(key Key) {
221	if elem, ok := lru.cache[key]; ok {
222		lru.removeElement(elem)
223	}
224}
225
226func (lru *LRU) removeElement(e *list.Element) {
227	entry := e.Value.(*Entry)
228	lru.ll.Remove(e)
229	delete(lru.cache, entry.key)
230	lru.usedSize -= entry.size
231	if lru.onEvicted != nil {
232		lru.onEvicted(entry.key, entry)
233	}
234}
235
236// Get returns a cache entry with key key.
237func (lru *LRU) Get(key Key) *Entry {
238	elem, ok := lru.cache[key]
239	if !ok {
240		return nil
241	}
242	lru.ll.MoveToFront(elem)
243	return elem.Value.(*Entry)
244}
245