1 /*-
2  * Copyright (c) 2010 Isilon Systems, Inc.
3  * Copyright (c) 2010 iX Systems, Inc.
4  * Copyright (c) 2010 Panasas, Inc.
5  * Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
6  * Copyright (c) 2015 François Tigeot
7  * Copyright (c) 2015 Matthew Dillon <dillon@backplane.com>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice unmodified, this list of conditions, and the following
15  *    disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD$
32  */
33 #ifndef	_LINUXKPI_LINUX_MM_H_
34 #define	_LINUXKPI_LINUX_MM_H_
35 
36 #include <linux/spinlock.h>
37 #include <linux/gfp.h>
38 #include <linux/kernel.h>
39 #include <linux/mm_types.h>
40 #include <linux/pfn.h>
41 #include <linux/list.h>
42 #include <linux/mmap_lock.h>
43 #include <linux/shrinker.h>
44 #include <linux/page.h>
45 
46 #include <asm/pgtable.h>
47 
48 #define	PAGE_ALIGN(x)	ALIGN(x, PAGE_SIZE)
49 
50 /*
51  * Make sure our LinuxKPI defined virtual memory flags don't conflict
52  * with the ones defined by FreeBSD:
53  */
54 CTASSERT((VM_PROT_ALL & -(1 << 8)) == 0);
55 
56 #define	VM_READ			VM_PROT_READ
57 #define	VM_WRITE		VM_PROT_WRITE
58 #define	VM_EXEC			VM_PROT_EXECUTE
59 
60 #define	VM_PFNINTERNAL		(1 << 8)	/* FreeBSD private flag to vm_insert_pfn() */
61 #define	VM_MIXEDMAP		(1 << 9)
62 #define	VM_NORESERVE		(1 << 10)
63 #define	VM_PFNMAP		(1 << 11)
64 #define	VM_IO			(1 << 12)
65 #define	VM_MAYWRITE		(1 << 13)
66 #define	VM_DONTCOPY		(1 << 14)
67 #define	VM_DONTEXPAND		(1 << 15)
68 #define	VM_DONTDUMP		(1 << 16)
69 #define	VM_SHARED		(1 << 17)
70 
71 #define	VMA_MAX_PREFAULT_RECORD	1
72 
73 #define	FOLL_WRITE		(1 << 0)
74 #define	FOLL_FORCE		(1 << 1)
75 
76 #define	VM_FAULT_OOM		(1 << 0)
77 #define	VM_FAULT_SIGBUS		(1 << 1)
78 #define	VM_FAULT_MAJOR		(1 << 2)
79 #define	VM_FAULT_WRITE		(1 << 3)
80 #define	VM_FAULT_HWPOISON	(1 << 4)
81 #define	VM_FAULT_HWPOISON_LARGE	(1 << 5)
82 #define	VM_FAULT_SIGSEGV	(1 << 6)
83 #define	VM_FAULT_NOPAGE		(1 << 7)
84 #define	VM_FAULT_LOCKED		(1 << 8)
85 #define	VM_FAULT_RETRY		(1 << 9)
86 #define	VM_FAULT_FALLBACK	(1 << 10)
87 
88 #define	VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
89 	VM_FAULT_HWPOISON |VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)
90 
91 #define	FAULT_FLAG_WRITE	(1 << 0)
92 #define	FAULT_FLAG_MKWRITE	(1 << 1)
93 #define	FAULT_FLAG_ALLOW_RETRY	(1 << 2)
94 #define	FAULT_FLAG_RETRY_NOWAIT	(1 << 3)
95 #define	FAULT_FLAG_KILLABLE	(1 << 4)
96 #define	FAULT_FLAG_TRIED	(1 << 5)
97 #define	FAULT_FLAG_USER		(1 << 6)
98 #define	FAULT_FLAG_REMOTE	(1 << 7)
99 #define	FAULT_FLAG_INSTRUCTION	(1 << 8)
100 
101 #define fault_flag_allow_retry_first(flags) \
102 	(((flags) & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_TRIED)) == FAULT_FLAG_ALLOW_RETRY)
103 
104 typedef int (*pte_fn_t)(linux_pte_t *, unsigned long addr, void *data);
105 
106 struct vm_area_struct {
107 	vm_offset_t vm_start;
108 	vm_offset_t vm_end;
109 	vm_offset_t vm_pgoff;
110 	pgprot_t vm_page_prot;
111 	unsigned long vm_flags;
112 	struct mm_struct *vm_mm;
113 	void   *vm_private_data;
114 	const struct vm_operations_struct *vm_ops;
115 	struct linux_file *vm_file;
116 
117 	/* internal operation */
118 	vm_paddr_t vm_pfn;		/* PFN for memory map */
119 	vm_size_t vm_len;		/* length for memory map */
120 	vm_pindex_t vm_pfn_first;
121 	int	vm_pfn_count;
122 	int    *vm_pfn_pcount;
123 	vm_object_t vm_obj;
124 	vm_map_t vm_cached_map;
125 	TAILQ_ENTRY(vm_area_struct) vm_entry;
126 };
127 
128 struct vm_fault {
129 	unsigned int flags;
130 	pgoff_t	pgoff;
131 	union {
132 		/* user-space address */
133 		void *virtual_address;	/* < 4.11 */
134 		unsigned long address;	/* >= 4.11 */
135 	};
136 	struct page *page;
137 	struct vm_area_struct *vma;
138 };
139 
140 struct vm_operations_struct {
141 	void    (*open) (struct vm_area_struct *);
142 	void    (*close) (struct vm_area_struct *);
143 	int     (*fault) (struct vm_fault *);
144 	int	(*access) (struct vm_area_struct *, unsigned long, void *, int, int);
145 };
146 
147 struct sysinfo {
148 	uint64_t totalram;	/* Total usable main memory size */
149 	uint64_t freeram;	/* Available memory size */
150 	uint64_t totalhigh;	/* Total high memory size */
151 	uint64_t freehigh;	/* Available high memory size */
152 	uint32_t mem_unit;	/* Memory unit size in bytes */
153 };
154 
155 static inline struct page *
156 virt_to_head_page(const void *p)
157 {
158 
159 	return (virt_to_page(p));
160 }
161 
162 /*
163  * Compute log2 of the power of two rounded up count of pages
164  * needed for size bytes.
165  */
166 static inline int
167 get_order(unsigned long size)
168 {
169 	int order;
170 
171 	size = (size - 1) >> PAGE_SHIFT;
172 	order = 0;
173 	while (size) {
174 		order++;
175 		size >>= 1;
176 	}
177 	return (order);
178 }
179 
180 static inline void *
181 lowmem_page_address(struct page *page)
182 {
183 	return (page_address(page));
184 }
185 
186 /*
187  * This only works via memory map operations.
188  */
189 static inline int
190 io_remap_pfn_range(struct vm_area_struct *vma,
191     unsigned long addr, unsigned long pfn, unsigned long size,
192     vm_memattr_t prot)
193 {
194 	vma->vm_page_prot = prot;
195 	vma->vm_pfn = pfn;
196 	vma->vm_len = size;
197 
198 	return (0);
199 }
200 
201 vm_fault_t
202 lkpi_vmf_insert_pfn_prot_locked(struct vm_area_struct *vma, unsigned long addr,
203     unsigned long pfn, pgprot_t prot);
204 
205 static inline vm_fault_t
206 vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
207     unsigned long pfn, pgprot_t prot)
208 {
209 	vm_fault_t ret;
210 
211 	VM_OBJECT_WLOCK(vma->vm_obj);
212 	ret = lkpi_vmf_insert_pfn_prot_locked(vma, addr, pfn, prot);
213 	VM_OBJECT_WUNLOCK(vma->vm_obj);
214 
215 	return (ret);
216 }
217 #define	vmf_insert_pfn_prot(...)	\
218 	_Static_assert(false,		\
219 "This function is always called in a loop. Consider using the locked version")
220 
221 static inline int
222 apply_to_page_range(struct mm_struct *mm, unsigned long address,
223     unsigned long size, pte_fn_t fn, void *data)
224 {
225 	return (-ENOTSUP);
226 }
227 
228 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
229     unsigned long size);
230 
231 int lkpi_remap_pfn_range(struct vm_area_struct *vma,
232     unsigned long start_addr, unsigned long start_pfn, unsigned long size,
233     pgprot_t prot);
234 
235 static inline int
236 remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
237     unsigned long pfn, unsigned long size, pgprot_t prot)
238 {
239 	return (lkpi_remap_pfn_range(vma, addr, pfn, size, prot));
240 }
241 
242 static inline unsigned long
243 vma_pages(struct vm_area_struct *vma)
244 {
245 	return ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
246 }
247 
248 #define	offset_in_page(off)	((unsigned long)(off) & (PAGE_SIZE - 1))
249 
250 static inline void
251 set_page_dirty(struct vm_page *page)
252 {
253 	vm_page_dirty(page);
254 }
255 
256 static inline void
257 mark_page_accessed(struct vm_page *page)
258 {
259 	vm_page_reference(page);
260 }
261 
262 static inline void
263 get_page(struct vm_page *page)
264 {
265 	vm_page_wire(page);
266 }
267 
268 extern long
269 get_user_pages(unsigned long start, unsigned long nr_pages,
270     unsigned int gup_flags, struct page **,
271     struct vm_area_struct **);
272 
273 static inline long
274 pin_user_pages(unsigned long start, unsigned long nr_pages,
275     unsigned int gup_flags, struct page **pages,
276     struct vm_area_struct **vmas)
277 {
278 	return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
279 }
280 
281 extern int
282 __get_user_pages_fast(unsigned long start, int nr_pages, int write,
283     struct page **);
284 
285 static inline int
286 pin_user_pages_fast(unsigned long start, int nr_pages,
287     unsigned int gup_flags, struct page **pages)
288 {
289 	return __get_user_pages_fast(
290 	    start, nr_pages, !!(gup_flags & FOLL_WRITE), pages);
291 }
292 
293 extern long
294 get_user_pages_remote(struct task_struct *, struct mm_struct *,
295     unsigned long start, unsigned long nr_pages,
296     unsigned int gup_flags, struct page **,
297     struct vm_area_struct **);
298 
299 static inline long
300 pin_user_pages_remote(struct task_struct *task, struct mm_struct *mm,
301     unsigned long start, unsigned long nr_pages,
302     unsigned int gup_flags, struct page **pages,
303     struct vm_area_struct **vmas)
304 {
305 	return get_user_pages_remote(
306 	    task, mm, start, nr_pages, gup_flags, pages, vmas);
307 }
308 
309 static inline void
310 put_page(struct vm_page *page)
311 {
312 	vm_page_unwire(page, PQ_ACTIVE);
313 }
314 
315 #define	unpin_user_page(page) put_page(page)
316 #define	unpin_user_pages(pages, npages) release_pages(pages, npages)
317 
318 #define	copy_highpage(to, from) pmap_copy_page(from, to)
319 
320 static inline pgprot_t
321 vm_get_page_prot(unsigned long vm_flags)
322 {
323 	return (vm_flags & VM_PROT_ALL);
324 }
325 
326 static inline vm_page_t
327 vmalloc_to_page(const void *addr)
328 {
329 	vm_paddr_t paddr;
330 
331 	paddr = pmap_kextract((vm_offset_t)addr);
332 	return (PHYS_TO_VM_PAGE(paddr));
333 }
334 
335 static inline int
336 trylock_page(struct page *page)
337 {
338 	return (vm_page_trylock(page));
339 }
340 
341 static inline void
342 unlock_page(struct page *page)
343 {
344 
345 	vm_page_unlock(page);
346 }
347 
348 extern int is_vmalloc_addr(const void *addr);
349 void si_meminfo(struct sysinfo *si);
350 
351 static inline unsigned long
352 totalram_pages(void)
353 {
354 	return ((unsigned long)physmem);
355 }
356 
357 #define	unmap_mapping_range(...)	lkpi_unmap_mapping_range(__VA_ARGS__)
358 void lkpi_unmap_mapping_range(void *obj, loff_t const holebegin __unused,
359     loff_t const holelen, int even_cows __unused);
360 
361 #define PAGE_ALIGNED(p)	__is_aligned(p, PAGE_SIZE)
362 
363 void vma_set_file(struct vm_area_struct *vma, struct linux_file *file);
364 
365 static inline void
366 might_alloc(gfp_t gfp_mask __unused)
367 {
368 }
369 
370 #define	is_cow_mapping(flags)	(false)
371 
372 #endif					/* _LINUXKPI_LINUX_MM_H_ */
373