1 /*******************************************************************************
2     Copyright (c) 2018-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_common.h"
25 #include "uvm_ioctl.h"
26 #include "uvm_linux.h"
27 #include "uvm_lock.h"
28 #include "uvm_api.h"
29 #include "uvm_va_range.h"
30 #include "uvm_va_space.h"
31 #include "uvm_populate_pageable.h"
32 
33 #if defined(NV_HANDLE_MM_FAULT_HAS_MM_ARG)
34 #define UVM_HANDLE_MM_FAULT(vma, addr, flags)       handle_mm_fault(vma->vm_mm, vma, addr, flags)
35 #elif defined(NV_HANDLE_MM_FAULT_HAS_PT_REGS_ARG)
36 #define UVM_HANDLE_MM_FAULT(vma, addr, flags)       handle_mm_fault(vma, addr, flags, NULL)
37 #else
38 #define UVM_HANDLE_MM_FAULT(vma, addr, flags)       handle_mm_fault(vma, addr, flags)
39 #endif
40 
41 static bool is_write_populate(struct vm_area_struct *vma, uvm_populate_permissions_t populate_permissions)
42 {
43     switch (populate_permissions) {
44         case UVM_POPULATE_PERMISSIONS_INHERIT:
45             return vma->vm_flags & VM_WRITE;
46         case UVM_POPULATE_PERMISSIONS_ANY:
47             return false;
48         case UVM_POPULATE_PERMISSIONS_WRITE:
49             return true;
50         default:
51             UVM_ASSERT(0);
52             return false;
53     }
54 }
55 
56 NV_STATUS uvm_handle_fault(struct vm_area_struct *vma, unsigned long start, unsigned long vma_num_pages, bool write)
57 {
58     NV_STATUS status = NV_OK;
59 
60     unsigned long i;
61     unsigned int ret = 0;
62     unsigned int fault_flags = write ? FAULT_FLAG_WRITE : 0;
63 
64 #ifdef FAULT_FLAG_REMOTE
65     fault_flags |= (FAULT_FLAG_REMOTE);
66 #endif
67 
68     for (i = 0; i < vma_num_pages; i++) {
69         ret = UVM_HANDLE_MM_FAULT(vma, start + (i * PAGE_SIZE), fault_flags);
70         if (ret & VM_FAULT_ERROR) {
71 #if defined(NV_VM_FAULT_TO_ERRNO_PRESENT)
72             int err = vm_fault_to_errno(ret, fault_flags);
73             status = errno_to_nv_status(err);
74 #else
75             status = errno_to_nv_status(-EFAULT);
76 #endif
77             break;
78         }
79     }
80 
81     return status;
82 }
83 
84 NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
85                                     unsigned long start,
86                                     unsigned long length,
87                                     int min_prot,
88                                     bool touch,
89                                     uvm_populate_permissions_t populate_permissions)
90 {
91     unsigned long vma_num_pages;
92     unsigned long outer = start + length;
93     unsigned int gup_flags = is_write_populate(vma, populate_permissions) ? FOLL_WRITE : 0;
94     struct mm_struct *mm = vma->vm_mm;
95     unsigned long vm_flags = vma->vm_flags;
96     bool uvm_managed_vma;
97     long ret;
98     struct page **pages = NULL;
99     NV_STATUS status = NV_OK;
100 
101     UVM_ASSERT(PAGE_ALIGNED(start));
102     UVM_ASSERT(PAGE_ALIGNED(outer));
103     UVM_ASSERT(vma->vm_end > start);
104     UVM_ASSERT(vma->vm_start < outer);
105     uvm_assert_mmap_lock_locked(mm);
106 
107     // On most CPU architectures, write permission implies RW permission.
108     if (vm_flags & VM_WRITE)
109         vm_flags = vm_flags | VM_READ;
110 
111     if ((vm_flags & min_prot) != min_prot)
112         return NV_ERR_INVALID_ADDRESS;
113 
114     // Adjust to input range boundaries
115     start = max(start, vma->vm_start);
116     outer = min(outer, vma->vm_end);
117 
118     vma_num_pages = (outer - start) / PAGE_SIZE;
119 
120     // Please see the comment in uvm_ats_service_fault() regarding the usage of
121     // the touch parameter for more details.
122     if (touch) {
123         pages = uvm_kvmalloc(vma_num_pages * sizeof(pages[0]));
124         if (!pages)
125             return NV_ERR_NO_MEMORY;
126     }
127 
128     // If the input vma is managed by UVM, temporarily remove the record
129     // associated with the locking of mmap_lock, in order to avoid a "locked
130     // twice" validation error triggered when also acquiring mmap_lock in the
131     // page fault handler. The page fault is caused by get_user_pages.
132     uvm_managed_vma = uvm_file_is_nvidia_uvm(vma->vm_file);
133     if (uvm_managed_vma)
134         uvm_record_unlock_mmap_lock_read(mm);
135 
136     status = uvm_handle_fault(vma, start, vma_num_pages, !!(gup_flags & FOLL_WRITE));
137     if (status != NV_OK)
138         goto out;
139 
140     if (touch)
141         ret = NV_PIN_USER_PAGES_REMOTE(mm, start, vma_num_pages, gup_flags, pages, NULL, NULL);
142     else
143         ret = NV_GET_USER_PAGES_REMOTE(mm, start, vma_num_pages, gup_flags, pages, NULL, NULL);
144 
145     if (uvm_managed_vma)
146         uvm_record_lock_mmap_lock_read(mm);
147 
148     if (ret < 0) {
149         status = errno_to_nv_status(ret);
150         goto out;
151     }
152 
153     // We couldn't populate all pages, return error
154     if (ret < vma_num_pages) {
155         if (touch) {
156             unsigned long i;
157 
158             for (i = 0; i < ret; i++) {
159                 UVM_ASSERT(pages[i]);
160                 NV_UNPIN_USER_PAGE(pages[i]);
161             }
162         }
163 
164         status = NV_ERR_NO_MEMORY;
165         goto out;
166     }
167 
168     if (touch) {
169         unsigned long i;
170 
171         for (i = 0; i < vma_num_pages; i++) {
172             uvm_touch_page(pages[i]);
173             NV_UNPIN_USER_PAGE(pages[i]);
174         }
175     }
176 
177 out:
178     uvm_kvfree(pages);
179     return status;
180 }
181 
182 NV_STATUS uvm_populate_pageable(struct mm_struct *mm,
183                                 const unsigned long start,
184                                 const unsigned long length,
185                                 int min_prot,
186                                 bool touch,
187                                 uvm_populate_permissions_t populate_permissions)
188 {
189     struct vm_area_struct *vma;
190     const unsigned long end = start + length;
191     unsigned long prev_end = end;
192 
193     UVM_ASSERT(PAGE_ALIGNED(start));
194     UVM_ASSERT(PAGE_ALIGNED(length));
195     uvm_assert_mmap_lock_locked(mm);
196 
197     vma = find_vma_intersection(mm, start, end);
198     if (!vma || (start < vma->vm_start))
199          return NV_ERR_INVALID_ADDRESS;
200 
201     // VMAs are validated and populated one at a time, since they may have
202     // different protection flags
203     // Validation of VM_SPECIAL flags is delegated to get_user_pages
204     for (; vma && vma->vm_start <= prev_end; vma = find_vma_intersection(mm, prev_end, end)) {
205         NV_STATUS status = uvm_populate_pageable_vma(vma, start, end - start, min_prot, touch, populate_permissions);
206 
207         if (status != NV_OK)
208             return status;
209 
210         if (vma->vm_end >= end)
211             return NV_OK;
212 
213         prev_end = vma->vm_end;
214     }
215 
216     // Input range not fully covered by VMAs
217     return NV_ERR_INVALID_ADDRESS;
218 }
219 
220 NV_STATUS uvm_api_populate_pageable(const UVM_POPULATE_PAGEABLE_PARAMS *params, struct file *filp)
221 {
222     NV_STATUS status;
223     bool allow_managed;
224     bool skip_prot_check;
225     int min_prot;
226     uvm_va_space_t *va_space = uvm_va_space_get(filp);
227 
228     if (params->flags & ~UVM_POPULATE_PAGEABLE_FLAGS_ALL)
229         return NV_ERR_INVALID_ARGUMENT;
230 
231     if ((params->flags & UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
232         UVM_INFO_PRINT("Test flag set for UVM_POPULATE_PAGEABLE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
233         return NV_ERR_INVALID_ARGUMENT;
234     }
235 
236     // Population of managed ranges is only allowed for test purposes. The goal
237     // is to validate that it is possible to populate pageable ranges backed by
238     // VMAs with the VM_MIXEDMAP or VM_DONTEXPAND special flags set. But since
239     // there is no portable way to force allocation of such memory from user
240     // space, and it is not safe to change the flags of an already created
241     // VMA from kernel space, we take advantage of the fact that managed ranges
242     // have both special flags set at creation time (see uvm_mmap)
243     allow_managed = params->flags & UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED;
244 
245     skip_prot_check = params->flags & UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK;
246     if (skip_prot_check)
247         min_prot = 0;
248     else
249         min_prot = VM_READ;
250 
251     // Check size, alignment and overflow. VMA validations are performed by
252     // populate_pageable
253     if (uvm_api_range_invalid(params->base, params->length))
254         return NV_ERR_INVALID_ADDRESS;
255 
256     // mmap_lock is needed to traverse the vmas in the input range and call
257     // into get_user_pages. Unlike most UVM APIs, this one is defined to only
258     // work on current->mm, not the mm associated with the VA space (if any).
259     uvm_down_read_mmap_lock(current->mm);
260 
261     if (allow_managed || uvm_va_space_range_empty(va_space, params->base, params->base + params->length - 1)) {
262         status = uvm_populate_pageable(current->mm,
263                                        params->base,
264                                        params->length,
265                                        min_prot,
266                                        false,
267                                        UVM_POPULATE_PERMISSIONS_INHERIT);
268     }
269     else {
270         status = NV_ERR_INVALID_ADDRESS;
271     }
272 
273     uvm_up_read_mmap_lock(current->mm);
274 
275     return status;
276 }
277