xref: /linux/arch/powerpc/platforms/pseries/vas.c (revision c6fbb759)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2020-21 IBM Corp.
4  */
5 
6 #define pr_fmt(fmt) "vas: " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/firmware.h>
20 #include <asm/vas.h>
21 #include "vas.h"
22 
23 #define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
24 #define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
25 /* The hypervisor allows one credit per window right now */
26 #define DEF_WIN_CREDS		1
27 
28 static struct vas_all_caps caps_all;
29 static bool copypaste_feat;
30 static struct hv_vas_cop_feat_caps hv_cop_caps;
31 
32 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
33 static DEFINE_MUTEX(vas_pseries_mutex);
34 static bool migration_in_progress;
35 
36 static long hcall_return_busy_check(long rc)
37 {
38 	/* Check if we are stalled for some time */
39 	if (H_IS_LONG_BUSY(rc)) {
40 		msleep(get_longbusy_msecs(rc));
41 		rc = H_BUSY;
42 	} else if (rc == H_BUSY) {
43 		cond_resched();
44 	}
45 
46 	return rc;
47 }
48 
49 /*
50  * Allocate VAS window hcall
51  */
52 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
53 				     u8 wintype, u16 credits)
54 {
55 	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
56 	long rc;
57 
58 	do {
59 		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
60 				  credits, domain[0], domain[1], domain[2],
61 				  domain[3], domain[4], domain[5]);
62 
63 		rc = hcall_return_busy_check(rc);
64 	} while (rc == H_BUSY);
65 
66 	if (rc == H_SUCCESS) {
67 		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
68 			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
69 			return -ENOTSUPP;
70 		}
71 		win->vas_win.winid = retbuf[0];
72 		win->win_addr = retbuf[1];
73 		win->complete_irq = retbuf[2];
74 		win->fault_irq = retbuf[3];
75 		return 0;
76 	}
77 
78 	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
79 		rc, wintype, credits);
80 
81 	return -EIO;
82 }
83 
84 /*
85  * Deallocate VAS window hcall.
86  */
87 static int h_deallocate_vas_window(u64 winid)
88 {
89 	long rc;
90 
91 	do {
92 		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
93 
94 		rc = hcall_return_busy_check(rc);
95 	} while (rc == H_BUSY);
96 
97 	if (rc == H_SUCCESS)
98 		return 0;
99 
100 	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
101 		rc, winid);
102 	return -EIO;
103 }
104 
105 /*
106  * Modify VAS window.
107  * After the window is opened with allocate window hcall, configure it
108  * with flags and LPAR PID before using.
109  */
110 static int h_modify_vas_window(struct pseries_vas_window *win)
111 {
112 	long rc;
113 
114 	/*
115 	 * AMR value is not supported in Linux VAS implementation.
116 	 * The hypervisor ignores it if 0 is passed.
117 	 */
118 	do {
119 		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
120 					win->vas_win.winid, win->pid, 0,
121 					VAS_MOD_WIN_FLAGS, 0);
122 
123 		rc = hcall_return_busy_check(rc);
124 	} while (rc == H_BUSY);
125 
126 	if (rc == H_SUCCESS)
127 		return 0;
128 
129 	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
130 			rc, win->vas_win.winid, win->pid);
131 	return -EIO;
132 }
133 
134 /*
135  * This hcall is used to determine the capabilities from the hypervisor.
136  * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
137  * @query_type: If 0 is passed, the hypervisor returns the overall
138  *		capabilities which provides all feature(s) that are
139  *		available. Then query the hypervisor to get the
140  *		corresponding capabilities for the specific feature.
141  *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
142  *			and VAS GZIP Default capabilities.
143  *			H_QUERY_NX_CAPABILITIES provides NX GZIP
144  *			capabilities.
145  * @result: Return buffer to save capabilities.
146  */
147 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
148 {
149 	long rc;
150 
151 	rc = plpar_hcall_norets(hcall, query_type, result);
152 
153 	if (rc == H_SUCCESS)
154 		return 0;
155 
156 	/* H_FUNCTION means HV does not support VAS so don't print an error */
157 	if (rc != H_FUNCTION) {
158 		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
159 			(hcall == H_QUERY_VAS_CAPABILITIES) ?
160 				"H_QUERY_VAS_CAPABILITIES" :
161 				"H_QUERY_NX_CAPABILITIES",
162 			rc, query_type, result);
163 	}
164 
165 	return -EIO;
166 }
167 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
168 
169 /*
170  * hcall to get fault CRB from the hypervisor.
171  */
172 static int h_get_nx_fault(u32 winid, u64 buffer)
173 {
174 	long rc;
175 
176 	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
177 
178 	if (rc == H_SUCCESS)
179 		return 0;
180 
181 	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
182 		rc, winid, buffer);
183 	return -EIO;
184 
185 }
186 
187 /*
188  * Handle the fault interrupt.
189  * When the fault interrupt is received for each window, query the
190  * hypervisor to get the fault CRB on the specific fault. Then
191  * process the CRB by updating CSB or send signal if the user space
192  * CSB is invalid.
193  * Note: The hypervisor forwards an interrupt for each fault request.
194  *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
195  */
196 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
197 {
198 	struct pseries_vas_window *txwin = data;
199 	struct coprocessor_request_block crb;
200 	struct vas_user_win_ref *tsk_ref;
201 	int rc;
202 
203 	rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
204 	if (!rc) {
205 		tsk_ref = &txwin->vas_win.task_ref;
206 		vas_dump_crb(&crb);
207 		vas_update_csb(&crb, tsk_ref);
208 	}
209 
210 	return IRQ_HANDLED;
211 }
212 
213 /*
214  * Allocate window and setup IRQ mapping.
215  */
216 static int allocate_setup_window(struct pseries_vas_window *txwin,
217 				 u64 *domain, u8 wintype)
218 {
219 	int rc;
220 
221 	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
222 	if (rc)
223 		return rc;
224 	/*
225 	 * On PowerVM, the hypervisor setup and forwards the fault
226 	 * interrupt per window. So the IRQ setup and fault handling
227 	 * will be done for each open window separately.
228 	 */
229 	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
230 	if (!txwin->fault_virq) {
231 		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
232 		rc = -EINVAL;
233 		goto out_win;
234 	}
235 
236 	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
237 				txwin->vas_win.winid);
238 	if (!txwin->name) {
239 		rc = -ENOMEM;
240 		goto out_irq;
241 	}
242 
243 	rc = request_threaded_irq(txwin->fault_virq, NULL,
244 				  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
245 				  txwin->name, txwin);
246 	if (rc) {
247 		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
248 		       txwin->vas_win.winid, txwin->fault_virq, rc);
249 		goto out_free;
250 	}
251 
252 	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
253 
254 	return 0;
255 out_free:
256 	kfree(txwin->name);
257 out_irq:
258 	irq_dispose_mapping(txwin->fault_virq);
259 out_win:
260 	h_deallocate_vas_window(txwin->vas_win.winid);
261 	return rc;
262 }
263 
264 static inline void free_irq_setup(struct pseries_vas_window *txwin)
265 {
266 	free_irq(txwin->fault_virq, txwin);
267 	kfree(txwin->name);
268 	irq_dispose_mapping(txwin->fault_virq);
269 }
270 
271 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
272 					      enum vas_cop_type cop_type)
273 {
274 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
275 	struct vas_cop_feat_caps *cop_feat_caps;
276 	struct vas_caps *caps;
277 	struct pseries_vas_window *txwin;
278 	int rc;
279 
280 	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
281 	if (!txwin)
282 		return ERR_PTR(-ENOMEM);
283 
284 	/*
285 	 * A VAS window can have many credits which means that many
286 	 * requests can be issued simultaneously. But the hypervisor
287 	 * restricts one credit per window.
288 	 * The hypervisor introduces 2 different types of credits:
289 	 * Default credit type (Uses normal priority FIFO):
290 	 *	A limited number of credits are assigned to partitions
291 	 *	based on processor entitlement. But these credits may be
292 	 *	over-committed on a system depends on whether the CPUs
293 	 *	are in shared or dedicated modes - that is, more requests
294 	 *	may be issued across the system than NX can service at
295 	 *	once which can result in paste command failure (RMA_busy).
296 	 *	Then the process has to resend requests or fall-back to
297 	 *	SW compression.
298 	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
299 	 *	To avoid NX HW contention, the system admins can assign
300 	 *	QoS credits for each LPAR so that this partition is
301 	 *	guaranteed access to NX resources. These credits are
302 	 *	assigned to partitions via the HMC.
303 	 *	Refer PAPR for more information.
304 	 *
305 	 * Allocate window with QoS credits if user requested. Otherwise
306 	 * default credits are used.
307 	 */
308 	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
309 		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
310 	else
311 		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
312 
313 	cop_feat_caps = &caps->caps;
314 
315 	if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
316 			atomic_read(&cop_feat_caps->nr_total_credits)) {
317 		pr_err("Credits are not available to allocate window\n");
318 		rc = -EINVAL;
319 		goto out;
320 	}
321 
322 	if (vas_id == -1) {
323 		/*
324 		 * The user space is requesting to allocate a window on
325 		 * a VAS instance where the process is executing.
326 		 * On PowerVM, domain values are passed to the hypervisor
327 		 * to select VAS instance. Useful if the process is
328 		 * affinity to NUMA node.
329 		 * The hypervisor selects VAS instance if
330 		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
331 		 * The h_allocate_vas_window hcall is defined to take a
332 		 * domain values as specified by h_home_node_associativity,
333 		 * So no unpacking needs to be done.
334 		 */
335 		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
336 				  VPHN_FLAG_VCPU, hard_smp_processor_id());
337 		if (rc != H_SUCCESS) {
338 			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
339 			goto out;
340 		}
341 	}
342 
343 	txwin->pid = mfspr(SPRN_PID);
344 
345 	/*
346 	 * Allocate / Deallocate window hcalls and setup / free IRQs
347 	 * have to be protected with mutex.
348 	 * Open VAS window: Allocate window hcall and setup IRQ
349 	 * Close VAS window: Deallocate window hcall and free IRQ
350 	 *	The hypervisor waits until all NX requests are
351 	 *	completed before closing the window. So expects OS
352 	 *	to handle NX faults, means IRQ can be freed only
353 	 *	after the deallocate window hcall is returned.
354 	 * So once the window is closed with deallocate hcall before
355 	 * the IRQ is freed, it can be assigned to new allocate
356 	 * hcall with the same fault IRQ by the hypervisor. It can
357 	 * result in setup IRQ fail for the new window since the
358 	 * same fault IRQ is not freed by the OS before.
359 	 */
360 	mutex_lock(&vas_pseries_mutex);
361 	if (migration_in_progress)
362 		rc = -EBUSY;
363 	else
364 		rc = allocate_setup_window(txwin, (u64 *)&domain[0],
365 				   cop_feat_caps->win_type);
366 	mutex_unlock(&vas_pseries_mutex);
367 	if (rc)
368 		goto out;
369 
370 	/*
371 	 * Modify window and it is ready to use.
372 	 */
373 	rc = h_modify_vas_window(txwin);
374 	if (!rc)
375 		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
376 	if (rc)
377 		goto out_free;
378 
379 	txwin->win_type = cop_feat_caps->win_type;
380 	mutex_lock(&vas_pseries_mutex);
381 	/*
382 	 * Possible to lose the acquired credit with DLPAR core
383 	 * removal after the window is opened. So if there are any
384 	 * closed windows (means with lost credits), do not give new
385 	 * window to user space. New windows will be opened only
386 	 * after the existing windows are reopened when credits are
387 	 * available.
388 	 */
389 	if (!caps->nr_close_wins) {
390 		list_add(&txwin->win_list, &caps->list);
391 		caps->nr_open_windows++;
392 		mutex_unlock(&vas_pseries_mutex);
393 		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
394 		return &txwin->vas_win;
395 	}
396 	mutex_unlock(&vas_pseries_mutex);
397 
398 	put_vas_user_win_ref(&txwin->vas_win.task_ref);
399 	rc = -EBUSY;
400 	pr_err("No credit is available to allocate window\n");
401 
402 out_free:
403 	/*
404 	 * Window is not operational. Free IRQ before closing
405 	 * window so that do not have to hold mutex.
406 	 */
407 	free_irq_setup(txwin);
408 	h_deallocate_vas_window(txwin->vas_win.winid);
409 out:
410 	atomic_dec(&cop_feat_caps->nr_used_credits);
411 	kfree(txwin);
412 	return ERR_PTR(rc);
413 }
414 
415 static u64 vas_paste_address(struct vas_window *vwin)
416 {
417 	struct pseries_vas_window *win;
418 
419 	win = container_of(vwin, struct pseries_vas_window, vas_win);
420 	return win->win_addr;
421 }
422 
423 static int deallocate_free_window(struct pseries_vas_window *win)
424 {
425 	int rc = 0;
426 
427 	/*
428 	 * The hypervisor waits for all requests including faults
429 	 * are processed before closing the window - Means all
430 	 * credits have to be returned. In the case of fault
431 	 * request, a credit is returned after OS issues
432 	 * H_GET_NX_FAULT hcall.
433 	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
434 	 * hcall.
435 	 */
436 	rc = h_deallocate_vas_window(win->vas_win.winid);
437 	if (!rc)
438 		free_irq_setup(win);
439 
440 	return rc;
441 }
442 
443 static int vas_deallocate_window(struct vas_window *vwin)
444 {
445 	struct pseries_vas_window *win;
446 	struct vas_cop_feat_caps *caps;
447 	int rc = 0;
448 
449 	if (!vwin)
450 		return -EINVAL;
451 
452 	win = container_of(vwin, struct pseries_vas_window, vas_win);
453 
454 	/* Should not happen */
455 	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
456 		pr_err("Window (%u): Invalid window type %u\n",
457 				vwin->winid, win->win_type);
458 		return -EINVAL;
459 	}
460 
461 	caps = &vascaps[win->win_type].caps;
462 	mutex_lock(&vas_pseries_mutex);
463 	/*
464 	 * VAS window is already closed in the hypervisor when
465 	 * lost the credit or with migration. So just remove the entry
466 	 * from the list, remove task references and free vas_window
467 	 * struct.
468 	 */
469 	if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
470 		!(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
471 		rc = deallocate_free_window(win);
472 		if (rc) {
473 			mutex_unlock(&vas_pseries_mutex);
474 			return rc;
475 		}
476 	} else
477 		vascaps[win->win_type].nr_close_wins--;
478 
479 	list_del(&win->win_list);
480 	atomic_dec(&caps->nr_used_credits);
481 	vascaps[win->win_type].nr_open_windows--;
482 	mutex_unlock(&vas_pseries_mutex);
483 
484 	put_vas_user_win_ref(&vwin->task_ref);
485 	mm_context_remove_vas_window(vwin->task_ref.mm);
486 
487 	kfree(win);
488 	return 0;
489 }
490 
491 static const struct vas_user_win_ops vops_pseries = {
492 	.open_win	= vas_allocate_window,	/* Open and configure window */
493 	.paste_addr	= vas_paste_address,	/* To do copy/paste */
494 	.close_win	= vas_deallocate_window, /* Close window */
495 };
496 
497 /*
498  * Supporting only nx-gzip coprocessor type now, but this API code
499  * extended to other coprocessor types later.
500  */
501 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
502 			     const char *name)
503 {
504 	if (!copypaste_feat)
505 		return -ENOTSUPP;
506 
507 	return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
508 }
509 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
510 
511 void vas_unregister_api_pseries(void)
512 {
513 	vas_unregister_coproc_api();
514 }
515 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
516 
517 /*
518  * Get the specific capabilities based on the feature type.
519  * Right now supports GZIP default and GZIP QoS capabilities.
520  */
521 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
522 				struct hv_vas_cop_feat_caps *hv_caps)
523 {
524 	struct vas_cop_feat_caps *caps;
525 	struct vas_caps *vcaps;
526 	int rc = 0;
527 
528 	vcaps = &vascaps[type];
529 	memset(vcaps, 0, sizeof(*vcaps));
530 	INIT_LIST_HEAD(&vcaps->list);
531 
532 	vcaps->feat = feat;
533 	caps = &vcaps->caps;
534 
535 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
536 					  (u64)virt_to_phys(hv_caps));
537 	if (rc)
538 		return rc;
539 
540 	caps->user_mode = hv_caps->user_mode;
541 	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
542 		pr_err("User space COPY/PASTE is not supported\n");
543 		return -ENOTSUPP;
544 	}
545 
546 	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
547 	caps->win_type = hv_caps->win_type;
548 	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
549 		pr_err("Unsupported window type %u\n", caps->win_type);
550 		return -EINVAL;
551 	}
552 	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
553 	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
554 	atomic_set(&caps->nr_total_credits,
555 		   be16_to_cpu(hv_caps->target_lpar_creds));
556 	if (feat == VAS_GZIP_DEF_FEAT) {
557 		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
558 
559 		if (caps->max_win_creds < DEF_WIN_CREDS) {
560 			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
561 			       DEF_WIN_CREDS, caps->max_win_creds);
562 			return -EINVAL;
563 		}
564 	}
565 
566 	rc = sysfs_add_vas_caps(caps);
567 	if (rc)
568 		return rc;
569 
570 	copypaste_feat = true;
571 
572 	return 0;
573 }
574 
575 /*
576  * VAS windows can be closed due to lost credits when the core is
577  * removed. So reopen them if credits are available due to DLPAR
578  * core add and set the window active status. When NX sees the page
579  * fault on the unmapped paste address, the kernel handles the fault
580  * by setting the remapping to new paste address if the window is
581  * active.
582  */
583 static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
584 				 bool migrate)
585 {
586 	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
587 	struct vas_cop_feat_caps *caps = &vcaps->caps;
588 	struct pseries_vas_window *win = NULL, *tmp;
589 	int rc, mv_ents = 0;
590 	int flag;
591 
592 	/*
593 	 * Nothing to do if there are no closed windows.
594 	 */
595 	if (!vcaps->nr_close_wins)
596 		return 0;
597 
598 	/*
599 	 * For the core removal, the hypervisor reduces the credits
600 	 * assigned to the LPAR and the kernel closes VAS windows
601 	 * in the hypervisor depends on reduced credits. The kernel
602 	 * uses LIFO (the last windows that are opened will be closed
603 	 * first) and expects to open in the same order when credits
604 	 * are available.
605 	 * For example, 40 windows are closed when the LPAR lost 2 cores
606 	 * (dedicated). If 1 core is added, this LPAR can have 20 more
607 	 * credits. It means the kernel can reopen 20 windows. So move
608 	 * 20 entries in the VAS windows lost and reopen next 20 windows.
609 	 * For partition migration, reopen all windows that are closed
610 	 * during resume.
611 	 */
612 	if ((vcaps->nr_close_wins > creds) && !migrate)
613 		mv_ents = vcaps->nr_close_wins - creds;
614 
615 	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
616 		if (!mv_ents)
617 			break;
618 
619 		mv_ents--;
620 	}
621 
622 	/*
623 	 * Open windows if they are closed only with migration or
624 	 * DLPAR (lost credit) before.
625 	 */
626 	if (migrate)
627 		flag = VAS_WIN_MIGRATE_CLOSE;
628 	else
629 		flag = VAS_WIN_NO_CRED_CLOSE;
630 
631 	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
632 		/*
633 		 * This window is closed with DLPAR and migration events.
634 		 * So reopen the window with the last event.
635 		 * The user space is not suspended with the current
636 		 * migration notifier. So the user space can issue DLPAR
637 		 * CPU hotplug while migration in progress. In this case
638 		 * this window will be opened with the last event.
639 		 */
640 		if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
641 			(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
642 			win->vas_win.status &= ~flag;
643 			continue;
644 		}
645 
646 		/*
647 		 * Nothing to do on this window if it is not closed
648 		 * with this flag
649 		 */
650 		if (!(win->vas_win.status & flag))
651 			continue;
652 
653 		rc = allocate_setup_window(win, (u64 *)&domain[0],
654 					   caps->win_type);
655 		if (rc)
656 			return rc;
657 
658 		rc = h_modify_vas_window(win);
659 		if (rc)
660 			goto out;
661 
662 		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
663 		/*
664 		 * Set window status to active
665 		 */
666 		win->vas_win.status &= ~flag;
667 		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
668 		win->win_type = caps->win_type;
669 		if (!--vcaps->nr_close_wins)
670 			break;
671 	}
672 
673 	return 0;
674 out:
675 	/*
676 	 * Window modify HCALL failed. So close the window to the
677 	 * hypervisor and return.
678 	 */
679 	free_irq_setup(win);
680 	h_deallocate_vas_window(win->vas_win.winid);
681 	return rc;
682 }
683 
684 /*
685  * The hypervisor reduces the available credits if the LPAR lost core. It
686  * means the excessive windows should not be active and the user space
687  * should not be using these windows to send compression requests to NX.
688  * So the kernel closes the excessive windows and unmap the paste address
689  * such that the user space receives paste instruction failure. Then up to
690  * the user space to fall back to SW compression and manage with the
691  * existing windows.
692  */
693 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
694 									bool migrate)
695 {
696 	struct pseries_vas_window *win, *tmp;
697 	struct vas_user_win_ref *task_ref;
698 	struct vm_area_struct *vma;
699 	int rc = 0, flag;
700 
701 	if (migrate)
702 		flag = VAS_WIN_MIGRATE_CLOSE;
703 	else
704 		flag = VAS_WIN_NO_CRED_CLOSE;
705 
706 	list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
707 		/*
708 		 * This window is already closed due to lost credit
709 		 * or for migration before. Go for next window.
710 		 * For migration, nothing to do since this window
711 		 * closed for DLPAR and will be reopened even on
712 		 * the destination system with other DLPAR operation.
713 		 */
714 		if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
715 			(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
716 			win->vas_win.status |= flag;
717 			continue;
718 		}
719 
720 		task_ref = &win->vas_win.task_ref;
721 		mutex_lock(&task_ref->mmap_mutex);
722 		vma = task_ref->vma;
723 		/*
724 		 * Number of available credits are reduced, So select
725 		 * and close windows.
726 		 */
727 		win->vas_win.status |= flag;
728 
729 		mmap_write_lock(task_ref->mm);
730 		/*
731 		 * vma is set in the original mapping. But this mapping
732 		 * is done with mmap() after the window is opened with ioctl.
733 		 * so we may not see the original mapping if the core remove
734 		 * is done before the original mmap() and after the ioctl.
735 		 */
736 		if (vma)
737 			zap_page_range(vma, vma->vm_start,
738 					vma->vm_end - vma->vm_start);
739 
740 		mmap_write_unlock(task_ref->mm);
741 		mutex_unlock(&task_ref->mmap_mutex);
742 		/*
743 		 * Close VAS window in the hypervisor, but do not
744 		 * free vas_window struct since it may be reused
745 		 * when the credit is available later (DLPAR with
746 		 * adding cores). This struct will be used
747 		 * later when the process issued with close(FD).
748 		 */
749 		rc = deallocate_free_window(win);
750 		/*
751 		 * This failure is from the hypervisor.
752 		 * No way to stop migration for these failures.
753 		 * So ignore error and continue closing other windows.
754 		 */
755 		if (rc && !migrate)
756 			return rc;
757 
758 		vcap->nr_close_wins++;
759 
760 		/*
761 		 * For migration, do not depend on lpar_creds in case if
762 		 * mismatch with the hypervisor value (should not happen).
763 		 * So close all active windows in the list and will be
764 		 * reopened windows based on the new lpar_creds on the
765 		 * destination system during resume.
766 		 */
767 		if (!migrate && !--excess_creds)
768 			break;
769 	}
770 
771 	return 0;
772 }
773 
774 /*
775  * Get new VAS capabilities when the core add/removal configuration
776  * changes. Reconfig window configurations based on the credits
777  * availability from this new capabilities.
778  */
779 int vas_reconfig_capabilties(u8 type, int new_nr_creds)
780 {
781 	struct vas_cop_feat_caps *caps;
782 	int old_nr_creds;
783 	struct vas_caps *vcaps;
784 	int rc = 0, nr_active_wins;
785 
786 	if (type >= VAS_MAX_FEAT_TYPE) {
787 		pr_err("Invalid credit type %d\n", type);
788 		return -EINVAL;
789 	}
790 
791 	vcaps = &vascaps[type];
792 	caps = &vcaps->caps;
793 
794 	mutex_lock(&vas_pseries_mutex);
795 
796 	old_nr_creds = atomic_read(&caps->nr_total_credits);
797 
798 	atomic_set(&caps->nr_total_credits, new_nr_creds);
799 	/*
800 	 * The total number of available credits may be decreased or
801 	 * increased with DLPAR operation. Means some windows have to be
802 	 * closed / reopened. Hold the vas_pseries_mutex so that the
803 	 * user space can not open new windows.
804 	 */
805 	if (old_nr_creds <  new_nr_creds) {
806 		/*
807 		 * If the existing target credits is less than the new
808 		 * target, reopen windows if they are closed due to
809 		 * the previous DLPAR (core removal).
810 		 */
811 		rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
812 					   false);
813 	} else {
814 		/*
815 		 * # active windows is more than new LPAR available
816 		 * credits. So close the excessive windows.
817 		 * On pseries, each window will have 1 credit.
818 		 */
819 		nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
820 		if (nr_active_wins > new_nr_creds)
821 			rc = reconfig_close_windows(vcaps,
822 					nr_active_wins - new_nr_creds,
823 					false);
824 	}
825 
826 	mutex_unlock(&vas_pseries_mutex);
827 	return rc;
828 }
829 /*
830  * Total number of default credits available (target_credits)
831  * in LPAR depends on number of cores configured. It varies based on
832  * whether processors are in shared mode or dedicated mode.
833  * Get the notifier when CPU configuration is changed with DLPAR
834  * operation so that get the new target_credits (vas default capabilities)
835  * and then update the existing windows usage if needed.
836  */
837 static int pseries_vas_notifier(struct notifier_block *nb,
838 				unsigned long action, void *data)
839 {
840 	struct of_reconfig_data *rd = data;
841 	struct device_node *dn = rd->dn;
842 	const __be32 *intserv = NULL;
843 	int new_nr_creds, len, rc = 0;
844 
845 	if ((action == OF_RECONFIG_ATTACH_NODE) ||
846 		(action == OF_RECONFIG_DETACH_NODE))
847 		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
848 					  &len);
849 	/*
850 	 * Processor config is not changed
851 	 */
852 	if (!intserv)
853 		return NOTIFY_OK;
854 
855 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
856 					vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
857 					(u64)virt_to_phys(&hv_cop_caps));
858 	if (!rc) {
859 		new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
860 		rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
861 						new_nr_creds);
862 	}
863 
864 	if (rc)
865 		pr_err("Failed reconfig VAS capabilities with DLPAR\n");
866 
867 	return rc;
868 }
869 
870 static struct notifier_block pseries_vas_nb = {
871 	.notifier_call = pseries_vas_notifier,
872 };
873 
874 /*
875  * For LPM, all windows have to be closed on the source partition
876  * before migration and reopen them on the destination partition
877  * after migration. So closing windows during suspend and
878  * reopen them during resume.
879  */
880 int vas_migration_handler(int action)
881 {
882 	struct vas_cop_feat_caps *caps;
883 	int old_nr_creds, new_nr_creds = 0;
884 	struct vas_caps *vcaps;
885 	int i, rc = 0;
886 
887 	/*
888 	 * NX-GZIP is not enabled. Nothing to do for migration.
889 	 */
890 	if (!copypaste_feat)
891 		return rc;
892 
893 	mutex_lock(&vas_pseries_mutex);
894 
895 	if (action == VAS_SUSPEND)
896 		migration_in_progress = true;
897 	else
898 		migration_in_progress = false;
899 
900 	for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
901 		vcaps = &vascaps[i];
902 		caps = &vcaps->caps;
903 		old_nr_creds = atomic_read(&caps->nr_total_credits);
904 
905 		rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
906 					      vcaps->feat,
907 					      (u64)virt_to_phys(&hv_cop_caps));
908 		if (!rc) {
909 			new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
910 			/*
911 			 * Should not happen. But incase print messages, close
912 			 * all windows in the list during suspend and reopen
913 			 * windows based on new lpar_creds on the destination
914 			 * system.
915 			 */
916 			if (old_nr_creds != new_nr_creds) {
917 				pr_err("Target credits mismatch with the hypervisor\n");
918 				pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
919 					action, old_nr_creds, new_nr_creds);
920 				pr_err("Used creds: %d, Active creds: %d\n",
921 					atomic_read(&caps->nr_used_credits),
922 					vcaps->nr_open_windows - vcaps->nr_close_wins);
923 			}
924 		} else {
925 			pr_err("state(%d): Get VAS capabilities failed with %d\n",
926 				action, rc);
927 			/*
928 			 * We can not stop migration with the current lpm
929 			 * implementation. So continue closing all windows in
930 			 * the list (during suspend) and return without
931 			 * opening windows (during resume) if VAS capabilities
932 			 * HCALL failed.
933 			 */
934 			if (action == VAS_RESUME)
935 				goto out;
936 		}
937 
938 		switch (action) {
939 		case VAS_SUSPEND:
940 			rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
941 							true);
942 			break;
943 		case VAS_RESUME:
944 			atomic_set(&caps->nr_total_credits, new_nr_creds);
945 			rc = reconfig_open_windows(vcaps, new_nr_creds, true);
946 			break;
947 		default:
948 			/* should not happen */
949 			pr_err("Invalid migration action %d\n", action);
950 			rc = -EINVAL;
951 			goto out;
952 		}
953 
954 		/*
955 		 * Ignore errors during suspend and return for resume.
956 		 */
957 		if (rc && (action == VAS_RESUME))
958 			goto out;
959 	}
960 
961 out:
962 	mutex_unlock(&vas_pseries_mutex);
963 	return rc;
964 }
965 
966 static int __init pseries_vas_init(void)
967 {
968 	struct hv_vas_all_caps *hv_caps;
969 	int rc = 0;
970 
971 	/*
972 	 * Linux supports user space COPY/PASTE only with Radix
973 	 */
974 	if (!radix_enabled()) {
975 		pr_err("API is supported only with radix page tables\n");
976 		return -ENOTSUPP;
977 	}
978 
979 	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
980 	if (!hv_caps)
981 		return -ENOMEM;
982 	/*
983 	 * Get VAS overall capabilities by passing 0 to feature type.
984 	 */
985 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
986 					  (u64)virt_to_phys(hv_caps));
987 	if (rc)
988 		goto out;
989 
990 	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
991 	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
992 
993 	sysfs_pseries_vas_init(&caps_all);
994 
995 	/*
996 	 * QOS capabilities available
997 	 */
998 	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
999 		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1000 					  VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1001 
1002 		if (rc)
1003 			goto out;
1004 	}
1005 	/*
1006 	 * Default capabilities available
1007 	 */
1008 	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1009 		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1010 					  VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1011 
1012 	if (!rc && copypaste_feat) {
1013 		if (firmware_has_feature(FW_FEATURE_LPAR))
1014 			of_reconfig_notifier_register(&pseries_vas_nb);
1015 
1016 		pr_info("GZIP feature is available\n");
1017 	} else {
1018 		/*
1019 		 * Should not happen, but only when get default
1020 		 * capabilities HCALL failed. So disable copy paste
1021 		 * feature.
1022 		 */
1023 		copypaste_feat = false;
1024 	}
1025 
1026 out:
1027 	kfree(hv_caps);
1028 	return rc;
1029 }
1030 machine_device_initcall(pseries, pseries_vas_init);
1031