1// SPDX-License-Identifier: GPL-2.0
2/*
3 * GPL HEADER START
4 *
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 only,
9 * as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * General Public License version 2 for more details (a copy is included
15 * in the LICENSE file that accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License
18 * version 2 along with this program; If not, see
19 * http://www.gnu.org/licenses/gpl-2.0.html
20 *
21 * GPL HEADER END
22 */
23/*
24 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Use is subject to license terms.
26 *
27 * Copyright (c) 2010, 2017, Intel Corporation.
28 */
29/*
30 * This file is part of Lustre, http://www.lustre.org/
31 * Lustre is a trademark of Sun Microsystems, Inc.
32 *
33 * lustre/ldlm/ldlm_pool.c
34 *
35 * Author: Yury Umanets <umka@clusterfs.com>
36 */
37
38/*
39 * Idea of this code is rather simple. Each second, for each server namespace
40 * we have SLV - server lock volume which is calculated on current number of
41 * granted locks, grant speed for past period, etc - that is, locking load.
42 * This SLV number may be thought as a flow definition for simplicity. It is
43 * sent to clients with each occasion to let them know what is current load
44 * situation on the server. By default, at the beginning, SLV on server is
45 * set max value which is calculated as the following: allow to one client
46 * have all locks of limit ->pl_limit for 10h.
47 *
48 * Next, on clients, number of cached locks is not limited artificially in any
49 * way as it was before. Instead, client calculates CLV, that is, client lock
50 * volume for each lock and compares it with last SLV from the server. CLV is
51 * calculated as the number of locks in LRU * lock live time in seconds. If
52 * CLV > SLV - lock is canceled.
53 *
54 * Client has LVF, that is, lock volume factor which regulates how much
55 * sensitive client should be about last SLV from server. The higher LVF is the
56 * more locks will be canceled on client. Default value for it is 1. Setting LVF
57 * to 2 means that client will cancel locks 2 times faster.
58 *
59 * Locks on a client will be canceled more intensively in these cases:
60 * (1) if SLV is smaller, that is, load is higher on the server;
61 * (2) client has a lot of locks (the more locks are held by client, the bigger
62 *     chances that some of them should be canceled);
63 * (3) client has old locks (taken some time ago);
64 *
65 * Thus, according to flow paradigm that we use for better understanding SLV,
66 * CLV is the volume of particle in flow described by SLV. According to this,
67 * if flow is getting thinner, more and more particles become outside of it and
68 * as particles are locks, they should be canceled.
69 *
70 * General idea of this belongs to Vitaly Fertman (vitaly@clusterfs.com).
71 * Andreas Dilger (adilger@clusterfs.com) proposed few nice ideas like using
72 * LVF and many cleanups. Flow definition to allow more easy understanding of
73 * the logic belongs to Nikita Danilov (nikita@clusterfs.com) as well as many
74 * cleanups and fixes. And design and implementation are done by Yury Umanets
75 * (umka@clusterfs.com).
76 *
77 * Glossary for terms used:
78 *
79 * pl_limit - Number of allowed locks in pool. Applies to server and client
80 * side (tunable);
81 *
82 * pl_granted - Number of granted locks (calculated);
83 * pl_grant_rate - Number of granted locks for last T (calculated);
84 * pl_cancel_rate - Number of canceled locks for last T (calculated);
85 * pl_grant_speed - Grant speed (GR - CR) for last T (calculated);
86 * pl_grant_plan - Planned number of granted locks for next T (calculated);
87 * pl_server_lock_volume - Current server lock volume (calculated);
88 *
89 * As it may be seen from list above, we have few possible tunables which may
90 * affect behavior much. They all may be modified via sysfs. However, they also
91 * give a possibility for constructing few pre-defined behavior policies. If
92 * none of predefines is suitable for a working pattern being used, new one may
93 * be "constructed" via sysfs tunables.
94 */
95
96#define DEBUG_SUBSYSTEM S_LDLM
97
98#include <lustre_dlm.h>
99#include <cl_object.h>
100#include <obd_class.h>
101#include <obd_support.h>
102#include "ldlm_internal.h"
103
104/*
105 * 50 ldlm locks for 1MB of RAM.
106 */
107#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_SHIFT)) * 50)
108
109/*
110 * Maximal possible grant step plan in %.
111 */
112#define LDLM_POOL_MAX_GSP (30)
113
114/*
115 * Minimal possible grant step plan in %.
116 */
117#define LDLM_POOL_MIN_GSP (1)
118
119/*
120 * This controls the speed of reaching LDLM_POOL_MAX_GSP
121 * with increasing thread period.
122 */
123#define LDLM_POOL_GSP_STEP_SHIFT (2)
124
125/*
126 * LDLM_POOL_GSP% of all locks is default GP.
127 */
128#define LDLM_POOL_GP(L)   (((L) * LDLM_POOL_MAX_GSP) / 100)
129
130/*
131 * Max age for locks on clients.
132 */
133#define LDLM_POOL_MAX_AGE (36000)
134
135/*
136 * The granularity of SLV calculation.
137 */
138#define LDLM_POOL_SLV_SHIFT (10)
139
140static inline u64 dru(u64 val, u32 shift, int round_up)
141{
142	return (val + (round_up ? (1 << shift) - 1 : 0)) >> shift;
143}
144
145static inline u64 ldlm_pool_slv_max(u32 L)
146{
147	/*
148	 * Allow to have all locks for 1 client for 10 hrs.
149	 * Formula is the following: limit * 10h / 1 client.
150	 */
151	u64 lim = (u64)L *  LDLM_POOL_MAX_AGE / 1;
152	return lim;
153}
154
155static inline u64 ldlm_pool_slv_min(u32 L)
156{
157	return 1;
158}
159
160enum {
161	LDLM_POOL_FIRST_STAT = 0,
162	LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT,
163	LDLM_POOL_GRANT_STAT,
164	LDLM_POOL_CANCEL_STAT,
165	LDLM_POOL_GRANT_RATE_STAT,
166	LDLM_POOL_CANCEL_RATE_STAT,
167	LDLM_POOL_GRANT_PLAN_STAT,
168	LDLM_POOL_SLV_STAT,
169	LDLM_POOL_SHRINK_REQTD_STAT,
170	LDLM_POOL_SHRINK_FREED_STAT,
171	LDLM_POOL_RECALC_STAT,
172	LDLM_POOL_TIMING_STAT,
173	LDLM_POOL_LAST_STAT
174};
175
176/**
177 * Calculates suggested grant_step in % of available locks for passed
178 * @period. This is later used in grant_plan calculations.
179 */
180static inline int ldlm_pool_t2gsp(unsigned int t)
181{
182	/*
183	 * This yields 1% grant step for anything below LDLM_POOL_GSP_STEP
184	 * and up to 30% for anything higher than LDLM_POOL_GSP_STEP.
185	 *
186	 * How this will affect execution is the following:
187	 *
188	 * - for thread period 1s we will have grant_step 1% which good from
189	 * pov of taking some load off from server and push it out to clients.
190	 * This is like that because 1% for grant_step means that server will
191	 * not allow clients to get lots of locks in short period of time and
192	 * keep all old locks in their caches. Clients will always have to
193	 * get some locks back if they want to take some new;
194	 *
195	 * - for thread period 10s (which is default) we will have 23% which
196	 * means that clients will have enough of room to take some new locks
197	 * without getting some back. All locks from this 23% which were not
198	 * taken by clients in current period will contribute in SLV growing.
199	 * SLV growing means more locks cached on clients until limit or grant
200	 * plan is reached.
201	 */
202	return LDLM_POOL_MAX_GSP -
203		((LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) >>
204		 (t >> LDLM_POOL_GSP_STEP_SHIFT));
205}
206
207/**
208 * Recalculates next stats on passed @pl.
209 *
210 * \pre ->pl_lock is locked.
211 */
212static void ldlm_pool_recalc_stats(struct ldlm_pool *pl, timeout_t period)
213{
214	int grant_plan = pl->pl_grant_plan;
215	u64 slv = pl->pl_server_lock_volume;
216	int granted = atomic_read(&pl->pl_granted);
217	int grant_rate = atomic_read(&pl->pl_grant_rate) / period;
218	int cancel_rate = atomic_read(&pl->pl_cancel_rate) / period;
219
220	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT,
221			    slv);
222	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
223			    granted);
224	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
225			    grant_rate);
226	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT,
227			    grant_plan);
228	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT,
229			    cancel_rate);
230}
231
232/**
233 * Sets SLV and Limit from container_of(pl, struct ldlm_namespace,
234 * ns_pool)->ns_obd tp passed @pl.
235 */
236static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
237{
238	struct obd_device *obd;
239
240	/*
241	 * Get new SLV and Limit from obd which is updated with coming
242	 * RPCs.
243	 */
244	obd = container_of(pl, struct ldlm_namespace,
245			   ns_pool)->ns_obd;
246	read_lock(&obd->obd_pool_lock);
247	pl->pl_server_lock_volume = obd->obd_pool_slv;
248	atomic_set(&pl->pl_limit, obd->obd_pool_limit);
249	read_unlock(&obd->obd_pool_lock);
250}
251
252/**
253 * Recalculates client size pool @pl according to current SLV and Limit.
254 */
255static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
256{
257	timeout_t recalc_interval_sec;
258	int ret;
259
260	recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
261	if (recalc_interval_sec < pl->pl_recalc_period)
262		return 0;
263
264	spin_lock(&pl->pl_lock);
265	/*
266	 * Check if we need to recalc lists now.
267	 */
268	recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
269	if (recalc_interval_sec < pl->pl_recalc_period) {
270		spin_unlock(&pl->pl_lock);
271		return 0;
272<<<<<<< found
273	}
274||||||| expected
275	 */
276	ldlm_pool_recalc_grant_plan(pl);
277
278	pl->pl_recalc_time = ktime_get_real_seconds();
279	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
280			    recalc_interval_sec);
281	spin_unlock(&pl->pl_lock);
282=======
283	 */
284	ldlm_pool_recalc_grant_plan(pl);
285
286	pl->pl_recalc_time = ktime_get_seconds();
287	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
288			    recalc_interval_sec);
289	spin_unlock(&pl->pl_lock);
290>>>>>>> replacement
291<<<<<<< found
292||||||| expected
293 */
294static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
295{
296	time64_t recalc_interval_sec;
297	int ret;
298=======
299 */
300static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
301{
302	timeout_t recalc_interval_sec;
303	int ret;
304>>>>>>> replacement
305
306<<<<<<< found
307	/*
308	 * Make sure that pool knows last SLV and Limit from obd.
309	 */
310	ldlm_cli_pool_pop_slv(pl);
311
312	spin_unlock(&pl->pl_lock);
313||||||| expected
314	ENTRY;
315
316	recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
317	if (recalc_interval_sec < pl->pl_recalc_period)
318		return 0;
319
320=======
321	ENTRY;
322
323	recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
324	if (recalc_interval_sec < pl->pl_recalc_period)
325		return 0;
326
327>>>>>>> replacement
328<<<<<<< found
329||||||| expected
330	/*
331	 * Check if we need to recalc lists now.
332	 */
333	recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
334	if (recalc_interval_sec < pl->pl_recalc_period) {
335		spin_unlock(&pl->pl_lock);
336=======
337	/*
338	 * Check if we need to recalc lists now.
339	 */
340	recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
341	if (recalc_interval_sec < pl->pl_recalc_period) {
342		spin_unlock(&pl->pl_lock);
343>>>>>>> replacement
344
345	/*
346	 * In the time of canceling locks on client we do not need to maintain
347	 * sharp timing, we only want to cancel locks asap according to new SLV.
348	 * It may be called when SLV has changed much, this is why we do not
349	 * take into account pl->pl_recalc_time here.
350	 */
351	ret = ldlm_cancel_lru(container_of(pl, struct ldlm_namespace, ns_pool),
352			      0, LCF_ASYNC, 0);
353
354	spin_lock(&pl->pl_lock);
355	/*
356	 * Time of LRU resizing might be longer than period,
357	 * so update after LRU resizing rather than before it.
358	 */
359	pl->pl_recalc_time = ktime_get_seconds();
360	lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
361			    recalc_interval_sec);
362	spin_unlock(&pl->pl_lock);
363	return ret;
364}
365
366/**
367 * This function is main entry point for memory pressure handling on client
368 * side.  Main goal of this function is to cancel some number of locks on
369 * passed @pl according to @nr and @gfp_mask.
370 */
371static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
372				int nr, gfp_t gfp_mask)
373{
374	struct ldlm_namespace *ns;
375	int unused;
376
377	ns = container_of(pl, struct ldlm_namespace, ns_pool);
378
379	/*
380	 * Do not cancel locks in case lru resize is disabled for this ns.
381	 */
382	if (!ns_connect_lru_resize(ns))
383		return 0;
384
385	/*
386	 * Make sure that pool knows last SLV and Limit from obd.
387	 */
388	spin_lock(&pl->pl_lock);
389	ldlm_cli_pool_pop_slv(pl);
390	spin_unlock(&pl->pl_lock);
391
392	spin_lock(&ns->ns_lock);
393	unused = ns->ns_nr_unused;
394	spin_unlock(&ns->ns_lock);
395
396	if (nr == 0)
397		return (unused / 100) * sysctl_vfs_cache_pressure;
398	else
399		return ldlm_cancel_lru(ns, nr, LCF_ASYNC, 0);
400}
401
402static const struct ldlm_pool_ops ldlm_cli_pool_ops = {
403	.po_recalc = ldlm_cli_pool_recalc,
404	.po_shrink = ldlm_cli_pool_shrink
405};
406
407/**
408 * Pool recalc wrapper. Will call either client or server pool recalc callback
409 * depending what pool @pl is used.
410 *
411 * \retval		time in seconds for the next recalc of this pool
412 */
413<<<<<<< found
414static int ldlm_pool_recalc(struct ldlm_pool *pl)
415{
416	u32 recalc_interval_sec;
417||||||| expected
418time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
419{
420	time64_t recalc_interval_sec;
421=======
422time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
423{
424	timeout_t recalc_interval_sec;
425>>>>>>> replacement
426	int count;
427
428	recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
429	if (recalc_interval_sec > 0) {
430		spin_lock(&pl->pl_lock);
431<<<<<<< found
432		recalc_interval_sec = ktime_get_real_seconds() -
433				      pl->pl_recalc_time;
434||||||| expected
435		recalc_interval_sec = ktime_get_real_seconds() -
436			pl->pl_recalc_time;
437=======
438		recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
439>>>>>>> replacement
440
441		if (recalc_interval_sec > 0) {
442			/*
443			 * Update pool statistics every recalc interval.
444			 */
445			ldlm_pool_recalc_stats(pl, recalc_interval_sec);
446
447			/*
448			 * Zero out all rates and speed for the last period.
449			 */
450			atomic_set(&pl->pl_grant_rate, 0);
451			atomic_set(&pl->pl_cancel_rate, 0);
452		}
453		spin_unlock(&pl->pl_lock);
454	}
455
456	if (pl->pl_ops->po_recalc) {
457		count = pl->pl_ops->po_recalc(pl);
458		lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
459				    count);
460	}
461
462<<<<<<< found
463	recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() +
464			      pl->pl_recalc_period;
465	if (recalc_interval_sec <= 0) {
466		/* DEBUG: should be re-removed after LU-4536 is fixed */
467		CDEBUG(D_DLMTRACE,
468		       "%s: Negative interval(%ld), too short period(%ld)\n",
469		       pl->pl_name, (long)recalc_interval_sec,
470		       (long)pl->pl_recalc_period);
471
472		/* Prevent too frequent recalculation. */
473		recalc_interval_sec = 1;
474	}
475
476	return recalc_interval_sec;
477||||||| expected
478	recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() +
479			      pl->pl_recalc_period;
480	if (recalc_interval_sec <= 0) {
481		/* DEBUG: should be re-removed after LU-4536 is fixed */
482		CDEBUG(D_DLMTRACE, "%s: Negative interval(%lld), too short period(%lld)\n",
483		       pl->pl_name, recalc_interval_sec,
484		       (s64)pl->pl_recalc_period);
485
486		/* Prevent too frequent recalculation. */
487		recalc_interval_sec = 1;
488	}
489
490	return recalc_interval_sec;
491=======
492	return pl->pl_recalc_time + pl->pl_recalc_period;
493>>>>>>> replacement
494}
495
496/*
497 * Pool shrink wrapper. Will call either client or server pool recalc callback
498 * depending what pool pl is used. When nr == 0, just return the number of
499 * freeable locks. Otherwise, return the number of canceled locks.
500 */
501static int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask)
502{
503	int cancel = 0;
504
505	if (pl->pl_ops->po_shrink) {
506		cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask);
507		if (nr > 0) {
508			lprocfs_counter_add(pl->pl_stats,
509					    LDLM_POOL_SHRINK_REQTD_STAT,
510					    nr);
511			lprocfs_counter_add(pl->pl_stats,
512					    LDLM_POOL_SHRINK_FREED_STAT,
513					    cancel);
514			CDEBUG(D_DLMTRACE,
515			       "%s: request to shrink %d locks, shrunk %d\n",
516			       pl->pl_name, nr, cancel);
517		}
518	}
519	return cancel;
520}
521
522static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
523{
524	int granted, grant_rate, cancel_rate;
525	int grant_speed, lvf;
526	struct ldlm_pool *pl = m->private;
527	timeout_t period;
528	u64 slv, clv;
529	u32 limit;
530
531	spin_lock(&pl->pl_lock);
532	slv = pl->pl_server_lock_volume;
533	clv = pl->pl_client_lock_volume;
534	limit = atomic_read(&pl->pl_limit);
535	granted = atomic_read(&pl->pl_granted);
536	period = ktime_get_seconds() - pl->pl_recalc_time;
537	if (period <= 0)
538		period = 1;
539	grant_rate = atomic_read(&pl->pl_grant_rate) / period;
540	cancel_rate = atomic_read(&pl->pl_cancel_rate) / period;
541	grant_speed = grant_rate - cancel_rate;
542	lvf = atomic_read(&pl->pl_lock_volume_factor);
543	spin_unlock(&pl->pl_lock);
544
545	seq_printf(m, "LDLM pool state (%s):\n"
546		      "  SLV: %llu\n"
547		      "  CLV: %llu\n"
548		      "  LVF: %d\n",
549		      pl->pl_name, slv, clv, (lvf * 100) >> 8);
550
551	seq_printf(m, "  GR:  %d\n  CR:  %d\n  GS:  %d\n"
552		      "  G:   %d\n  L:   %d\n",
553		      grant_rate, cancel_rate, grant_speed,
554		      granted, limit);
555
556	return 0;
557}
558
559LDEBUGFS_SEQ_FOPS_RO(lprocfs_pool_state);
560
561static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr,
562				char *buf)
563{
564	struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool,
565					    pl_kobj);
566	int grant_speed;
567	timeout_t period;
568
569	spin_lock(&pl->pl_lock);
570	/* serialize with ldlm_pool_recalc */
571	period = ktime_get_seconds() - pl->pl_recalc_time;
572	if (period <= 0)
573		period = 1;
574	grant_speed = (atomic_read(&pl->pl_grant_rate) -
575		       atomic_read(&pl->pl_cancel_rate)) / period;
576	spin_unlock(&pl->pl_lock);
577	return sprintf(buf, "%d\n", grant_speed);
578}
579LUSTRE_RO_ATTR(grant_speed);
580
581LDLM_POOL_SYSFS_READER_SHOW(grant_plan, int);
582LUSTRE_RO_ATTR(grant_plan);
583
584LDLM_POOL_SYSFS_READER_SHOW(recalc_period, int);
585LDLM_POOL_SYSFS_WRITER_STORE(recalc_period, int);
586LUSTRE_RW_ATTR(recalc_period);
587
588LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(server_lock_volume, u64);
589LUSTRE_RO_ATTR(server_lock_volume);
590
591LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(client_lock_volume, u64);
592LUSTRE_RO_ATTR(client_lock_volume);
593
594LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(limit, atomic);
595LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(limit, atomic);
596LUSTRE_RW_ATTR(limit);
597
598LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(granted, atomic);
599LUSTRE_RO_ATTR(granted);
600
601LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(cancel_rate, atomic);
602LUSTRE_RO_ATTR(cancel_rate);
603
604LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(grant_rate, atomic);
605LUSTRE_RO_ATTR(grant_rate);
606
607static ssize_t lock_volume_factor_show(struct kobject *kobj,
608				       struct attribute *attr,
609				       char *buf)
610{
611	struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
612	unsigned long tmp;
613
614	tmp = (atomic_read(&pl->pl_lock_volume_factor) * 100) >> 8;
615	return sprintf(buf, "%lu\n", tmp);
616}
617
618static ssize_t lock_volume_factor_store(struct kobject *kobj,
619					struct attribute *attr,
620					const char *buffer,
621					size_t count)
622{
623	struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
624	unsigned long tmp;
625	int rc;
626
627	rc = kstrtoul(buffer, 10, &tmp);
628	if (rc < 0) {
629		return rc;
630	}
631
632	tmp = (tmp << 8) / 100;
633	atomic_set(&pl->pl_lock_volume_factor, tmp);
634
635	return count;
636
637}
638LUSTRE_RW_ATTR(lock_volume_factor);
639
640static ssize_t recalc_time_show(struct kobject *kobj,
641				struct attribute *attr,
642				char *buf)
643{
644	struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
645
646	return snprintf(buf, PAGE_SIZE, "%llu\n",
647			ktime_get_seconds() - pl->pl_recalc_time);
648}
649LUSTRE_RO_ATTR(recalc_time);
650
651/* These are for pools in /sys/fs/lustre/ldlm/namespaces/.../pool */
652static struct attribute *ldlm_pl_attrs[] = {
653	&lustre_attr_grant_speed.attr,
654	&lustre_attr_grant_plan.attr,
655	&lustre_attr_recalc_period.attr,
656	&lustre_attr_server_lock_volume.attr,
657	&lustre_attr_client_lock_volume.attr,
658	&lustre_attr_recalc_time.attr,
659	&lustre_attr_limit.attr,
660	&lustre_attr_granted.attr,
661	&lustre_attr_cancel_rate.attr,
662	&lustre_attr_grant_rate.attr,
663	&lustre_attr_lock_volume_factor.attr,
664	NULL,
665};
666
667static void ldlm_pl_release(struct kobject *kobj)
668{
669	struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool,
670					    pl_kobj);
671	complete(&pl->pl_kobj_unregister);
672}
673
674static struct kobj_type ldlm_pl_ktype = {
675	.default_attrs	= ldlm_pl_attrs,
676	.sysfs_ops	= &lustre_sysfs_ops,
677	.release	= ldlm_pl_release,
678};
679
680static int ldlm_pool_sysfs_init(struct ldlm_pool *pl)
681{
682	struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace,
683						 ns_pool);
684	int err;
685
686	init_completion(&pl->pl_kobj_unregister);
687	err = kobject_init_and_add(&pl->pl_kobj, &ldlm_pl_ktype, &ns->ns_kobj,
688				   "pool");
689
690	return err;
691}
692
693static int ldlm_pool_debugfs_init(struct ldlm_pool *pl)
694{
695	struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace,
696						 ns_pool);
697	struct dentry *debugfs_ns_parent;
698	struct ldebugfs_vars pool_vars[2];
699	int rc = 0;
700
701	debugfs_ns_parent = ns->ns_debugfs_entry;
702	if (IS_ERR_OR_NULL(debugfs_ns_parent)) {
703		CERROR("%s: debugfs entry is not initialized\n",
704		       ldlm_ns_name(ns));
705		rc = -EINVAL;
706		goto out;
707	}
708	pl->pl_debugfs_entry = debugfs_create_dir("pool", debugfs_ns_parent);
709
710	memset(pool_vars, 0, sizeof(pool_vars));
711
712	ldlm_add_var(&pool_vars[0], pl->pl_debugfs_entry, "state", pl,
713		     &lprocfs_pool_state_fops);
714
715	pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT -
716					   LDLM_POOL_FIRST_STAT, 0);
717	if (!pl->pl_stats) {
718		rc = -ENOMEM;
719		goto out;
720	}
721
722	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
723			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
724			     "granted", "locks");
725	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT,
726			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
727			     "grant", "locks");
728	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT,
729			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
730			     "cancel", "locks");
731	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
732			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
733			     "grant_rate", "locks/s");
734	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT,
735			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
736			     "cancel_rate", "locks/s");
737	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT,
738			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
739			     "grant_plan", "locks/s");
740	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT,
741			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
742			     "slv", "slv");
743	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT,
744			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
745			     "shrink_request", "locks");
746	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT,
747			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
748			     "shrink_freed", "locks");
749	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT,
750			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
751			     "recalc_freed", "locks");
752	lprocfs_counter_init(pl->pl_stats, LDLM_POOL_TIMING_STAT,
753			     LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
754			     "recalc_timing", "sec");
755	debugfs_create_file("stats", 0644, pl->pl_debugfs_entry, pl->pl_stats,
756			    &ldebugfs_stats_seq_fops);
757
758out:
759	return rc;
760}
761
762static void ldlm_pool_sysfs_fini(struct ldlm_pool *pl)
763{
764	kobject_put(&pl->pl_kobj);
765	wait_for_completion(&pl->pl_kobj_unregister);
766}
767
768static void ldlm_pool_debugfs_fini(struct ldlm_pool *pl)
769{
770	if (pl->pl_stats) {
771		lprocfs_free_stats(&pl->pl_stats);
772		pl->pl_stats = NULL;
773	}
774	debugfs_remove_recursive(pl->pl_debugfs_entry);
775}
776
777int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
778		   int idx, enum ldlm_side client)
779{
780	int rc;
781
782	spin_lock_init(&pl->pl_lock);
783	atomic_set(&pl->pl_granted, 0);
784	pl->pl_recalc_time = ktime_get_seconds();
785	atomic_set(&pl->pl_lock_volume_factor, 1 << 8);
786
787	atomic_set(&pl->pl_grant_rate, 0);
788	atomic_set(&pl->pl_cancel_rate, 0);
789	pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L);
790
791	snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d",
792		 ldlm_ns_name(ns), idx);
793
794	atomic_set(&pl->pl_limit, 1);
795	pl->pl_server_lock_volume = 0;
796	pl->pl_ops = &ldlm_cli_pool_ops;
797	pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
798	pl->pl_client_lock_volume = 0;
799	rc = ldlm_pool_debugfs_init(pl);
800	if (rc)
801		return rc;
802
803	rc = ldlm_pool_sysfs_init(pl);
804	if (rc)
805		return rc;
806
807	CDEBUG(D_DLMTRACE, "Lock pool %s is initialized\n", pl->pl_name);
808
809	return rc;
810}
811
812void ldlm_pool_fini(struct ldlm_pool *pl)
813{
814	ldlm_pool_sysfs_fini(pl);
815	ldlm_pool_debugfs_fini(pl);
816
817	/*
818	 * Pool should not be used after this point. We can't free it here as
819	 * it lives in struct ldlm_namespace, but still interested in catching
820	 * any abnormal using cases.
821	 */
822	POISON(pl, 0x5a, sizeof(*pl));
823}
824
825/**
826 * Add new taken ldlm lock @lock into pool @pl accounting.
827 */
828void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
829{
830	/*
831	 * FLOCK locks are special in a sense that they are almost never
832	 * cancelled, instead special kind of lock is used to drop them.
833	 * also there is no LRU for flock locks, so no point in tracking
834	 * them anyway.
835	 */
836	if (lock->l_resource->lr_type == LDLM_FLOCK)
837		return;
838
839	atomic_inc(&pl->pl_granted);
840	atomic_inc(&pl->pl_grant_rate);
841	lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT);
842	/*
843	 * Do not do pool recalc for client side as all locks which
844	 * potentially may be canceled has already been packed into
845	 * enqueue/cancel rpc. Also we do not want to run out of stack
846	 * with too long call paths.
847	 */
848}
849
850/**
851 * Remove ldlm lock @lock from pool @pl accounting.
852 */
853void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
854{
855	/*
856	 * Filter out FLOCK locks. Read above comment in ldlm_pool_add().
857	 */
858	if (lock->l_resource->lr_type == LDLM_FLOCK)
859		return;
860
861	LASSERT(atomic_read(&pl->pl_granted) > 0);
862	atomic_dec(&pl->pl_granted);
863	atomic_inc(&pl->pl_cancel_rate);
864
865	lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
866}
867
868/**
869 * Returns current @pl SLV.
870 *
871 * \pre ->pl_lock is not locked.
872 */
873u64 ldlm_pool_get_slv(struct ldlm_pool *pl)
874{
875	u64 slv;
876
877	spin_lock(&pl->pl_lock);
878	slv = pl->pl_server_lock_volume;
879	spin_unlock(&pl->pl_lock);
880	return slv;
881}
882
883/**
884 * Sets passed @clv to @pl.
885 *
886 * \pre ->pl_lock is not locked.
887 */
888void ldlm_pool_set_clv(struct ldlm_pool *pl, u64 clv)
889{
890	spin_lock(&pl->pl_lock);
891	pl->pl_client_lock_volume = clv;
892	spin_unlock(&pl->pl_lock);
893}
894
895/**
896 * Returns current LVF from @pl.
897 */
898u32 ldlm_pool_get_lvf(struct ldlm_pool *pl)
899{
900	return atomic_read(&pl->pl_lock_volume_factor);
901}
902
903static int ldlm_pool_granted(struct ldlm_pool *pl)
904{
905	return atomic_read(&pl->pl_granted);
906}
907
908/*
909 * count locks from all namespaces (if possible). Returns number of
910 * cached locks.
911 */
912static unsigned long ldlm_pools_count(enum ldlm_side client, gfp_t gfp_mask)
913{
914	unsigned long total = 0;
915	int nr_ns;
916	struct ldlm_namespace *ns;
917	struct ldlm_namespace *ns_old = NULL; /* loop detection */
918
919	if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
920		return 0;
921
922	/*
923	 * Find out how many resources we may release.
924	 */
925	for (nr_ns = ldlm_namespace_nr_read(client);
926	     nr_ns > 0; nr_ns--) {
927		mutex_lock(ldlm_namespace_lock(client));
928		if (list_empty(ldlm_namespace_list(client))) {
929			mutex_unlock(ldlm_namespace_lock(client));
930			return 0;
931		}
932		ns = ldlm_namespace_first_locked(client);
933
934		if (ns == ns_old) {
935			mutex_unlock(ldlm_namespace_lock(client));
936			break;
937		}
938
939		if (ldlm_ns_empty(ns)) {
940			ldlm_namespace_move_to_inactive_locked(ns, client);
941			mutex_unlock(ldlm_namespace_lock(client));
942			continue;
943		}
944
945		if (!ns_old)
946			ns_old = ns;
947
948		ldlm_namespace_get(ns);
949		ldlm_namespace_move_to_active_locked(ns, client);
950		mutex_unlock(ldlm_namespace_lock(client));
951		total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
952		ldlm_namespace_put(ns);
953	}
954
955	return total;
956}
957
958static unsigned long ldlm_pools_scan(enum ldlm_side client, int nr,
959				     gfp_t gfp_mask)
960{
961	unsigned long freed = 0;
962	int tmp, nr_ns;
963	struct ldlm_namespace *ns;
964
965	if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
966		return -1;
967
968	/*
969	 * Shrink at least ldlm_namespace_nr_read(client) namespaces.
970	 */
971	for (tmp = nr_ns = ldlm_namespace_nr_read(client);
972	     tmp > 0; tmp--) {
973		int cancel, nr_locks;
974
975		/*
976		 * Do not call shrink under ldlm_namespace_lock(client)
977		 */
978		mutex_lock(ldlm_namespace_lock(client));
979		if (list_empty(ldlm_namespace_list(client))) {
980			mutex_unlock(ldlm_namespace_lock(client));
981			break;
982		}
983		ns = ldlm_namespace_first_locked(client);
984		ldlm_namespace_get(ns);
985		ldlm_namespace_move_to_active_locked(ns, client);
986		mutex_unlock(ldlm_namespace_lock(client));
987
988		nr_locks = ldlm_pool_granted(&ns->ns_pool);
989		/*
990		 * We use to shrink propotionally but with new shrinker API,
991		 * we lost the total number of freeable locks.
992		 */
993		cancel = 1 + min_t(int, nr_locks, nr / nr_ns);
994		freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
995		ldlm_namespace_put(ns);
996	}
997	/*
998	 * we only decrease the SLV in server pools shrinker, return
999	 * SHRINK_STOP to kernel to avoid needless loop. LU-1128
1000	 */
1001	return freed;
1002}
1003
1004static unsigned long ldlm_pools_cli_count(struct shrinker *s,
1005					  struct shrink_control *sc)
1006{
1007	return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask);
1008}
1009
1010static unsigned long ldlm_pools_cli_scan(struct shrinker *s,
1011					 struct shrink_control *sc)
1012{
1013	return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan,
1014			       sc->gfp_mask);
1015}
1016
1017static void ldlm_pools_recalc(struct work_struct *ws);
1018static DECLARE_DELAYED_WORK(ldlm_recalc_pools, ldlm_pools_recalc);
1019
1020static void ldlm_pools_recalc(struct work_struct *ws)
1021{
1022	enum ldlm_side client = LDLM_NAMESPACE_CLIENT;
1023	struct ldlm_namespace *ns;
1024	struct ldlm_namespace *ns_old = NULL;
1025	/* seconds of sleep if no active namespaces */
1026<<<<<<< found
1027	time64_t time = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
1028||||||| expected
1029	time64_t delay = side == LDLM_NAMESPACE_SERVER ?
1030				 LDLM_POOL_SRV_DEF_RECALC_PERIOD :
1031				 LDLM_POOL_CLI_DEF_RECALC_PERIOD;
1032=======
1033	time64_t delay = ktime_get_seconds() +
1034			 (side == LDLM_NAMESPACE_SERVER ?
1035			  LDLM_POOL_SRV_DEF_RECALC_PERIOD :
1036			  LDLM_POOL_CLI_DEF_RECALC_PERIOD);
1037>>>>>>> replacement
1038	int nr;
1039
1040	/*
1041	 * Recalc at least ldlm_namespace_nr_read(client) namespaces.
1042	 */
1043	for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) {
1044		int skip;
1045		/*
1046		 * Lock the list, get first @ns in the list, getref, move it
1047		 * to the tail, unlock and call pool recalc. This way we avoid
1048		 * calling recalc under @ns lock what is really good as we get
1049		 * rid of potential deadlock on client nodes when canceling
1050		 * locks synchronously.
1051		 */
1052		mutex_lock(ldlm_namespace_lock(client));
1053		if (list_empty(ldlm_namespace_list(client))) {
1054			mutex_unlock(ldlm_namespace_lock(client));
1055			break;
1056		}
1057		ns = ldlm_namespace_first_locked(client);
1058
1059		if (ns_old == ns) { /* Full pass complete */
1060			mutex_unlock(ldlm_namespace_lock(client));
1061			break;
1062		}
1063
1064		/* We got an empty namespace, need to move it back to inactive
1065		 * list.
1066		 * The race with parallel resource creation is fine:
1067		 * - If they do namespace_get before our check, we fail the
1068		 *   check and they move this item to the end of the list anyway
1069		 * - If we do the check and then they do namespace_get, then
1070		 *   we move the namespace to inactive and they will move
1071		 *   it back to active (synchronised by the lock, so no clash
1072		 *   there).
1073		 */
1074		if (ldlm_ns_empty(ns)) {
1075			ldlm_namespace_move_to_inactive_locked(ns, client);
1076			mutex_unlock(ldlm_namespace_lock(client));
1077			continue;
1078		}
1079
1080		if (!ns_old)
1081			ns_old = ns;
1082
1083		spin_lock(&ns->ns_lock);
1084		/*
1085		 * skip ns which is being freed, and we don't want to increase
1086		 * its refcount again, not even temporarily. bz21519 & LU-499.
1087		 */
1088		if (ns->ns_stopping) {
1089			skip = 1;
1090		} else {
1091			skip = 0;
1092			ldlm_namespace_get(ns);
1093		}
1094		spin_unlock(&ns->ns_lock);
1095
1096		ldlm_namespace_move_to_active_locked(ns, client);
1097		mutex_unlock(ldlm_namespace_lock(client));
1098
1099		/*
1100		 * After setup is done - recalc the pool.
1101		 */
1102		if (!skip) {
1103			time64_t ttime = ldlm_pool_recalc(&ns->ns_pool);
1104
1105			if (ttime < time)
1106				time = ttime;
1107
1108			ldlm_namespace_put(ns);
1109		}
1110	}
1111
1112	/* Wake up the blocking threads from time to time. */
1113	ldlm_bl_thread_wakeup();
1114
1115	delay -= ktime_get_seconds();
1116	if (delay <= 0) {
1117		/* Prevent too frequent recalculation. */
1118		CDEBUG(D_DLMTRACE, "Negative interval(%lld)\n", delay);
1119		delay = 1;
1120	}
1121
1122	schedule_delayed_work(&ldlm_recalc_pools, time * HZ);
1123}
1124
1125static int ldlm_pools_thread_start(void)
1126{
1127	time64_t delay;
1128
1129	schedule_delayed_work(&ldlm_recalc_pools, 0);
1130
1131<<<<<<< found
1132	return 0;
1133}
1134
1135static void ldlm_pools_thread_stop(void)
1136{
1137||||||| expected
1138	DEF_SHRINKER_VAR(shcvar, ldlm_pools_cli_shrink,
1139			 ldlm_pools_cli_count, ldlm_pools_cli_scan);
1140
1141	schedule_delayed_work(&ldlm_pools_recalc_work,
1142			      LDLM_POOL_CLI_DEF_RECALC_PERIOD);
1143	ldlm_pools_srv_shrinker = set_shrinker(DEFAULT_SEEKS, &shsvar);
1144=======
1145	DEF_SHRINKER_VAR(shcvar, ldlm_pools_cli_shrink,
1146			 ldlm_pools_cli_count, ldlm_pools_cli_scan);
1147
1148#ifdef HAVE_SERVER_SUPPORT
1149	delay = min(LDLM_POOL_SRV_DEF_RECALC_PERIOD,
1150		    LDLM_POOL_CLI_DEF_RECALC_PERIOD);
1151#else
1152	delay = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
1153#endif
1154
1155	schedule_delayed_work(&ldlm_pools_recalc_work, delay);
1156	ldlm_pools_srv_shrinker = set_shrinker(DEFAULT_SEEKS, &shsvar);
1157>>>>>>> replacement
1158	cancel_delayed_work_sync(&ldlm_recalc_pools);
1159}
1160
1161static struct shrinker ldlm_pools_cli_shrinker = {
1162	.count_objects	= ldlm_pools_cli_count,
1163	.scan_objects	= ldlm_pools_cli_scan,
1164	.seeks		= DEFAULT_SEEKS,
1165};
1166
1167int ldlm_pools_init(void)
1168{
1169	int rc;
1170
1171	rc = ldlm_pools_thread_start();
1172	if (!rc)
1173		rc = register_shrinker(&ldlm_pools_cli_shrinker);
1174
1175	return rc;
1176}
1177
1178void ldlm_pools_fini(void)
1179{
1180	unregister_shrinker(&ldlm_pools_cli_shrinker);
1181
1182	ldlm_pools_thread_stop();
1183}
1184