1 /*******************************************************************************
2     Copyright (c) 2018 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_MIGRATE_PAGEABLE_H__
25 #define __UVM_MIGRATE_PAGEABLE_H__
26 
27 #include "uvm_common.h"
28 #include "uvm_linux.h"
29 #include "uvm_populate_pageable.h"
30 #include "uvm_forward_decl.h"
31 #include "uvm_processors.h"
32 
33 typedef struct
34 {
35     uvm_va_space_t                  *va_space;
36     struct mm_struct                *mm;
37     unsigned long                   start;
38     unsigned long                   length;
39     uvm_processor_id_t              dst_id;
40 
41     // dst_node_id may be clobbered by uvm_migrate_pageable().
42     int                             dst_node_id;
43     uvm_populate_permissions_t      populate_permissions;
44     bool                            touch : 1;
45     bool                            skip_mapped : 1;
46     NvU64                           *user_space_start;
47     NvU64                           *user_space_length;
48 } uvm_migrate_args_t;
49 
50 #if defined(CONFIG_MIGRATE_VMA_HELPER)
51 #define UVM_MIGRATE_VMA_SUPPORTED 1
52 #else
53 #if defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_VMA_SETUP_PRESENT)
54 #define UVM_MIGRATE_VMA_SUPPORTED 1
55 #endif
56 #endif
57 
58 #ifdef UVM_MIGRATE_VMA_SUPPORTED
59 #include <linux/migrate.h>
60 
61 // The calls to migrate_vma are capped at 512 pages to set an upper bound on the
62 // amount of metadata that needs to be allocated for the operation. This number
63 // was chosen because performance seems to plateau at this size on 64K-pages
64 // kernels. On kernels with PAGE_SIZE == 4K, 512 pages correspond to 2M VA block,
65 // which is also a standard size for batch operations.
66 #define UVM_MIGRATE_VMA_MAX_PAGES (512UL)
67 #define UVM_MIGRATE_VMA_MAX_SIZE (UVM_MIGRATE_VMA_MAX_PAGES * PAGE_SIZE)
68 
69 typedef struct
70 {
71     // Input parameters
72     uvm_migrate_args_t  *uvm_migrate_args;
73 
74     // Output parameters
75     //
76     // Error code. This only signals errors in internal UVM operations.
77     // Pages that failed allocation or could not be populated are communicated
78     // using the fields below.
79     NV_STATUS           status;
80 
81     // Mask of pages that couldn't be made resident on the destination because
82     // (a) they are backed with data but pages are not populated (e.g. in swap),
83     // (b) pages are not backed with any data yet but were not populated
84     // due to the vma not being READ_WRITE, as it would not charge the pages to
85     // the process properly.
86     // (c) pages are already mapped and such pages were requested to not be
87     // migrated via skip_mapped.
88     // (d) pages which couldn't be migrated by the kernel.
89     DECLARE_BITMAP(populate_pages_mask, UVM_MIGRATE_VMA_MAX_PAGES);
90 
91     // Mask of pages that failed allocation on the destination
92     DECLARE_BITMAP(allocation_failed_mask, UVM_MIGRATE_VMA_MAX_PAGES);
93 
94     // Mask of pages which are already resident at the destination.
95     DECLARE_BITMAP(dst_resident_pages_mask, UVM_MIGRATE_VMA_MAX_PAGES);
96 
97     // Global state managed by the caller
98     //
99     // These are scratch masks that can be used by the migrate_vma caller to
100     // save output page masks and orchestrate the migrate_vma
101     // retries/population calls if needed.
102     DECLARE_BITMAP(scratch1_mask, UVM_MIGRATE_VMA_MAX_PAGES);
103     DECLARE_BITMAP(scratch2_mask, UVM_MIGRATE_VMA_MAX_PAGES);
104 
105     // Arrays used by migrate_vma to store the src/dst pfns
106     unsigned long dst_pfn_array[UVM_MIGRATE_VMA_MAX_PAGES];
107     unsigned long src_pfn_array[UVM_MIGRATE_VMA_MAX_PAGES];
108 
109     // Internal state
110     //
111     uvm_tracker_t tracker;
112 
113     struct {
114         // Array of page IOMMU mappings created during allocate_and_copy.
115         // Required when using SYS aperture. They are freed in
116         // finalize_and_map. Also keep an array with the GPUs for which the
117         // mapping was created.
118         NvU64              addrs[UVM_MIGRATE_VMA_MAX_PAGES];
119         uvm_gpu_t    *addrs_gpus[UVM_MIGRATE_VMA_MAX_PAGES];
120 
121         // Mask of pages with entries in the dma address arrays above
122         DECLARE_BITMAP(page_mask, UVM_MIGRATE_VMA_MAX_PAGES);
123 
124         // Number of pages for which IOMMU mapping were created
125         unsigned  long num_pages;
126     } dma;
127 
128     // Processors where pages are resident before calling migrate_vma
129     uvm_processor_mask_t src_processors;
130 
131     // Array of per-processor page masks with the pages that are resident
132     // before calling migrate_vma.
133     struct {
134         DECLARE_BITMAP(page_mask, UVM_MIGRATE_VMA_MAX_PAGES);
135     } processors[UVM_ID_MAX_PROCESSORS];
136 
137     // Number of pages in the migrate_vma call
138     unsigned long num_pages;
139 
140     // Number of pages that are directly populated on the destination
141     unsigned long num_populate_anon_pages;
142 } migrate_vma_state_t;
143 
144 #if defined(CONFIG_MIGRATE_VMA_HELPER)
145 struct migrate_vma {
146     struct vm_area_struct   *vma;
147     unsigned long           *dst;
148     unsigned long           *src;
149     unsigned long           start;
150     unsigned long           end;
151 };
152 
153 void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
154                                            const unsigned long *src,
155                                            unsigned long *dst,
156                                            unsigned long start,
157                                            unsigned long end,
158                                            void *private);
159 
160 void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
161                                              const unsigned long *src,
162                                              const unsigned long *dst,
163                                              unsigned long start,
164                                              unsigned long end,
165                                              void *private);
166 #else
167 void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state);
168 void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state);
169 #endif // CONFIG_MIGRATE_VMA_HELPER
170 
171 // Populates the given VA range and tries to migrate all the pages to dst_id. If
172 // the destination processor is the CPU, the NUMA node in dst_node_id is used.
173 // The input VA range must be fully backed by VMAs. This function relies on
174 // migrate_vma, which was added in Linux 4.14. If skip_mapped is set to true,
175 // then already mapped pages will not be migrated. For kernels that do not
176 // provide migrate_vma, this function populates the memory using get_user_pages
177 // and returns NV_WARN_NOTHING_TO_DO to complete the migration in user space for
178 // API calls made from userspace. Kernel callers are expected to handle this
179 // error according to their respective usecases. user_space_start and
180 // user_space_length will contain the full input range. skip_mapped is ignored
181 // for such kernels. If the destination is the CPU and dst_node_id is full,
182 // NV_ERR_MORE_PROCESSING_REQUIRED is returned and user-space will call
183 // UVM_MIGRATE with the next preferred CPU node (if more are available),
184 // starting at the address specified by user_space_start. If the destination is
185 // a GPU and a page could not be populated, return NV_ERR_NO_MEMORY. Otherwise,
186 // return NV_OK. This is fine because UvmMigrate/UvmMigrateAsync only guarantee
187 // that the memory is populated somewhere in the system, not that pages moved to
188 // the requested processor.
189 //
190 // migrate_vma does not support file-backed vmas yet. If a file-backed vma is
191 // found, populates the pages corresponding to the vma with get_user_pages() and
192 // returns NV_WARN_NOTHING_TO_DO. The caller is expected to handle this error.
193 // API calls will fall back to user-mode to complete the migration. Kernel
194 // callers are expected to handle this error according to the respective
195 // usecases. If NV_WARN_NOTHING_TO_DO is returned, user_space_start and
196 // user_space_length will contain the intersection of the vma address range and
197 // [start:start + length].
198 //
199 // If a user-mode fallback is required but current->mm != uvm_migrate_args->mm,
200 // NV_ERR_NOT_SUPPORTED is returned since user mode can't perform such a
201 // migration. If the caller is a kernel thread, such as the GPU BH, this check
202 // is ignored since the caller is not expected to take such a fallback.
203 //
204 // Also, if no GPUs have been registered in the VA space, populates the pages
205 // corresponding to the first vma in the requested region using get_user_pages()
206 // and returns NV_WARN_NOTHING_TO_DO to fall back to user space for the
207 // userspace API callers to complete the whole migration. Kernel callers are
208 // expected to handle this error according to their respective usecases.
209 //
210 // If touch is true, a touch will be attempted on all pages in the requested
211 // range. All pages are only guaranteed to have been touched if
212 // NV_WARN_NOTHING_TO_DO or NV_OK is returned.
213 //
214 // Locking: mmap_lock must be held in read or write mode
215 NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args);
216 
217 NV_STATUS uvm_migrate_pageable_init(void);
218 
219 void uvm_migrate_pageable_exit(void);
220 #else // UVM_MIGRATE_VMA_SUPPORTED
221 
222 static NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
223 {
224     NV_STATUS status;
225 
226     if (current->mm != uvm_migrate_args->mm && !(current->flags & PF_KTHREAD))
227         return NV_ERR_NOT_SUPPORTED;
228 
229     status = uvm_populate_pageable(uvm_migrate_args->mm,
230                                    uvm_migrate_args->start,
231                                    uvm_migrate_args->length,
232                                    0,
233                                    uvm_migrate_args->touch,
234                                    uvm_migrate_args->populate_permissions);
235     if (status != NV_OK)
236         return status;
237 
238     *(uvm_migrate_args->user_space_start) = uvm_migrate_args->start;
239     *(uvm_migrate_args->user_space_length) = uvm_migrate_args->length;
240 
241     return NV_WARN_NOTHING_TO_DO;
242 }
243 
244 static NV_STATUS uvm_migrate_pageable_init(void)
245 {
246     return NV_OK;
247 }
248 
249 static void uvm_migrate_pageable_exit(void)
250 {
251 }
252 
253 #endif // UVM_MIGRATE_VMA_SUPPORTED
254 
255 #endif
256