1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 #include <sys/spa.h>
23 #include <sys/zio.h>
24 #include <sys/spa_impl.h>
25 #include <sys/counter.h>
26 #include <sys/zio_compress.h>
27 #include <sys/zio_checksum.h>
28 #include <sys/zfs_context.h>
29 #include <sys/arc.h>
30 #include <sys/zfs_refcount.h>
31 #include <sys/vdev.h>
32 #include <sys/vdev_trim.h>
33 #include <sys/vdev_impl.h>
34 #include <sys/dsl_pool.h>
35 #include <sys/zio_checksum.h>
36 #include <sys/multilist.h>
37 #include <sys/abd.h>
38 #include <sys/zil.h>
39 #include <sys/fm/fs/zfs.h>
40 #include <sys/eventhandler.h>
41 #include <sys/callb.h>
42 #include <sys/kstat.h>
43 #include <sys/zthr.h>
44 #include <zfs_fletcher.h>
45 #include <sys/arc_impl.h>
46 #include <sys/sdt.h>
47 #include <sys/aggsum.h>
48 #include <sys/vnode.h>
49 #include <cityhash.h>
50 #include <machine/vmparam.h>
51 #include <sys/vm.h>
52 #include <sys/vmmeter.h>
53 
54 #if __FreeBSD_version >= 1300139
55 static struct sx arc_vnlru_lock;
56 static struct vnode *arc_vnlru_marker;
57 #endif
58 
59 extern struct vfsops zfs_vfsops;
60 
61 uint_t zfs_arc_free_target = 0;
62 
63 static void
64 arc_free_target_init(void *unused __unused)
65 {
66 	zfs_arc_free_target = vm_cnt.v_free_target;
67 }
68 SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
69     arc_free_target_init, NULL);
70 
71 /*
72  * We don't have a tunable for arc_free_target due to the dependency on
73  * pagedaemon initialisation.
74  */
75 static int
76 sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS)
77 {
78 	uint_t val;
79 	int err;
80 
81 	val = zfs_arc_free_target;
82 	err = sysctl_handle_int(oidp, &val, 0, req);
83 	if (err != 0 || req->newptr == NULL)
84 		return (err);
85 
86 	if (val < minfree)
87 		return (EINVAL);
88 	if (val > vm_cnt.v_page_count)
89 		return (EINVAL);
90 
91 	zfs_arc_free_target = val;
92 
93 	return (0);
94 }
95 SYSCTL_DECL(_vfs_zfs);
96 /* BEGIN CSTYLED */
97 SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
98     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof (uint_t),
99     sysctl_vfs_zfs_arc_free_target, "IU",
100     "Desired number of free pages below which ARC triggers reclaim");
101 /* END CSTYLED */
102 
103 int64_t
104 arc_available_memory(void)
105 {
106 	int64_t lowest = INT64_MAX;
107 	int64_t n __unused;
108 
109 	/*
110 	 * Cooperate with pagedaemon when it's time for it to scan
111 	 * and reclaim some pages.
112 	 */
113 	n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
114 	if (n < lowest) {
115 		lowest = n;
116 	}
117 #if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
118 	/*
119 	 * If we're on an i386 platform, it's possible that we'll exhaust the
120 	 * kernel heap space before we ever run out of available physical
121 	 * memory.  Most checks of the size of the heap_area compare against
122 	 * tune.t_minarmem, which is the minimum available real memory that we
123 	 * can have in the system.  However, this is generally fixed at 25 pages
124 	 * which is so low that it's useless.  In this comparison, we seek to
125 	 * calculate the total heap-size, and reclaim if more than 3/4ths of the
126 	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
127 	 * free)
128 	 */
129 	n = uma_avail() - (long)(uma_limit() / 4);
130 	if (n < lowest) {
131 		lowest = n;
132 	}
133 #endif
134 
135 	DTRACE_PROBE1(arc__available_memory, int64_t, lowest);
136 	return (lowest);
137 }
138 
139 /*
140  * Return a default max arc size based on the amount of physical memory.
141  */
142 uint64_t
143 arc_default_max(uint64_t min, uint64_t allmem)
144 {
145 	uint64_t size;
146 
147 	if (allmem >= 1 << 30)
148 		size = allmem - (1 << 30);
149 	else
150 		size = min;
151 	return (MAX(allmem * 5 / 8, size));
152 }
153 
154 /*
155  * Helper function for arc_prune_async() it is responsible for safely
156  * handling the execution of a registered arc_prune_func_t.
157  */
158 static void
159 arc_prune_task(void *arg)
160 {
161 	int64_t nr_scan = (intptr_t)arg;
162 
163 	arc_reduce_target_size(ptob(nr_scan));
164 #if __FreeBSD_version >= 1300139
165 	sx_xlock(&arc_vnlru_lock);
166 	vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
167 	sx_xunlock(&arc_vnlru_lock);
168 #else
169 	vnlru_free(nr_scan, &zfs_vfsops);
170 #endif
171 }
172 
173 /*
174  * Notify registered consumers they must drop holds on a portion of the ARC
175  * buffered they reference.  This provides a mechanism to ensure the ARC can
176  * honor the arc_meta_limit and reclaim otherwise pinned ARC buffers.  This
177  * is analogous to dnlc_reduce_cache() but more generic.
178  *
179  * This operation is performed asynchronously so it may be safely called
180  * in the context of the arc_reclaim_thread().  A reference is taken here
181  * for each registered arc_prune_t and the arc_prune_task() is responsible
182  * for releasing it once the registered arc_prune_func_t has completed.
183  */
184 void
185 arc_prune_async(int64_t adjust)
186 {
187 
188 #ifndef __LP64__
189 	if (adjust > INTPTR_MAX)
190 		adjust = INTPTR_MAX;
191 #endif
192 	taskq_dispatch(arc_prune_taskq, arc_prune_task,
193 	    (void *)(intptr_t)adjust, TQ_SLEEP);
194 	ARCSTAT_BUMP(arcstat_prune);
195 }
196 
197 uint64_t
198 arc_all_memory(void)
199 {
200 	return (ptob(physmem));
201 }
202 
203 int
204 arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
205 {
206 	return (0);
207 }
208 
209 uint64_t
210 arc_free_memory(void)
211 {
212 	return (ptob(freemem));
213 }
214 
215 static eventhandler_tag arc_event_lowmem = NULL;
216 
217 static void
218 arc_lowmem(void *arg __unused, int howto __unused)
219 {
220 	int64_t free_memory, to_free;
221 
222 	arc_no_grow = B_TRUE;
223 	arc_warm = B_TRUE;
224 	arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
225 	free_memory = arc_available_memory();
226 	to_free = (arc_c >> arc_shrink_shift) - MIN(free_memory, 0);
227 	DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
228 	arc_reduce_target_size(to_free);
229 
230 	/*
231 	 * It is unsafe to block here in arbitrary threads, because we can come
232 	 * here from ARC itself and may hold ARC locks and thus risk a deadlock
233 	 * with ARC reclaim thread.
234 	 */
235 	if (curproc == pageproc)
236 		arc_wait_for_eviction(to_free, B_FALSE);
237 }
238 
239 void
240 arc_lowmem_init(void)
241 {
242 	arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
243 	    EVENTHANDLER_PRI_FIRST);
244 #if __FreeBSD_version >= 1300139
245 	arc_vnlru_marker = vnlru_alloc_marker();
246 	sx_init(&arc_vnlru_lock, "arc vnlru lock");
247 #endif
248 }
249 
250 void
251 arc_lowmem_fini(void)
252 {
253 	if (arc_event_lowmem != NULL)
254 		EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
255 #if __FreeBSD_version >= 1300139
256 	if (arc_vnlru_marker != NULL) {
257 		vnlru_free_marker(arc_vnlru_marker);
258 		sx_destroy(&arc_vnlru_lock);
259 	}
260 #endif
261 }
262 
263 void
264 arc_register_hotplug(void)
265 {
266 }
267 
268 void
269 arc_unregister_hotplug(void)
270 {
271 }
272