1 /*******************************************************************************
2     Copyright (c) 2018 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_MIGRATE_PAGEABLE_H__
25 #define __UVM_MIGRATE_PAGEABLE_H__
26 
27 #include "uvm_common.h"
28 #include "uvm_linux.h"
29 #include "uvm_populate_pageable.h"
30 #include "uvm_forward_decl.h"
31 #include "uvm_processors.h"
32 
33 typedef struct
34 {
35     uvm_va_space_t                  *va_space;
36     struct mm_struct                *mm;
37     const unsigned long             start;
38     const unsigned long             length;
39     uvm_processor_id_t              dst_id;
40 
41     // dst_node_id may be clobbered by uvm_migrate_pageable().
42     int                             dst_node_id;
43     uvm_populate_permissions_t      populate_permissions;
44     bool                            touch : 1;
45     bool                            skip_mapped : 1;
46     bool                            populate_on_cpu_alloc_failures : 1;
47     NvU64                           *user_space_start;
48     NvU64                           *user_space_length;
49 } uvm_migrate_args_t;
50 
51 #if defined(CONFIG_MIGRATE_VMA_HELPER)
52 #define UVM_MIGRATE_VMA_SUPPORTED 1
53 #else
54 #if defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_VMA_SETUP_PRESENT)
55 #define UVM_MIGRATE_VMA_SUPPORTED 1
56 #endif
57 #endif
58 
59 #ifdef UVM_MIGRATE_VMA_SUPPORTED
60 #include <linux/migrate.h>
61 
62 // The calls to migrate_vma are capped at 512 pages to set an upper bound on the
63 // amount of metadata that needs to be allocated for the operation. This number
64 // was chosen because performance seems to plateau at this size on 64K-pages
65 // kernels. On kernels with PAGE_SIZE == 4K, 512 pages correspond to 2M VA block,
66 // which is also a standard size for batch operations.
67 #define UVM_MIGRATE_VMA_MAX_PAGES (512UL)
68 #define UVM_MIGRATE_VMA_MAX_SIZE (UVM_MIGRATE_VMA_MAX_PAGES * PAGE_SIZE)
69 
70 typedef struct
71 {
72     // Input parameters
73     uvm_migrate_args_t  *uvm_migrate_args;
74 
75     // Output parameters
76     //
77     // Error code. This only signals errors in internal UVM operations.
78     // Pages that failed allocation or could not be populated are communicated
79     // using the fields below.
80     NV_STATUS           status;
81 
82     // Mask of pages that couldn't be made resident on the destination because
83     // (a) they are backed with data but pages are not populated (e.g. in swap),
84     // (b) pages are not backed with any data yet but were not populated
85     // due to the vma not being READ_WRITE, as it would not charge the pages to
86     // the process properly.
87     // (c) pages are already mapped and such pages were requested to not be
88     // migrated via skip_mapped.
89     // (d) pages which couldn't be migrated by the kernel.
90     DECLARE_BITMAP(populate_pages_mask, UVM_MIGRATE_VMA_MAX_PAGES);
91 
92     // Mask of pages that failed allocation on the destination
93     DECLARE_BITMAP(allocation_failed_mask, UVM_MIGRATE_VMA_MAX_PAGES);
94 
95     // Mask of pages which are already resident at the destination.
96     DECLARE_BITMAP(dst_resident_pages_mask, UVM_MIGRATE_VMA_MAX_PAGES);
97 
98     // Global state managed by the caller
99     //
100     // These are scratch masks that can be used by the migrate_vma caller to
101     // save output page masks and orchestrate the migrate_vma
102     // retries/population calls if needed.
103     DECLARE_BITMAP(scratch1_mask, UVM_MIGRATE_VMA_MAX_PAGES);
104     DECLARE_BITMAP(scratch2_mask, UVM_MIGRATE_VMA_MAX_PAGES);
105 
106     // Arrays used by migrate_vma to store the src/dst pfns
107     unsigned long dst_pfn_array[UVM_MIGRATE_VMA_MAX_PAGES];
108     unsigned long src_pfn_array[UVM_MIGRATE_VMA_MAX_PAGES];
109 
110     // Internal state
111     //
112     uvm_tracker_t tracker;
113 
114     struct {
115         // Array of page IOMMU mappings created during allocate_and_copy.
116         // Required when using SYS aperture. They are freed in
117         // finalize_and_map. Also keep an array with the GPUs for which the
118         // mapping was created.
119         NvU64              addrs[UVM_MIGRATE_VMA_MAX_PAGES];
120         uvm_gpu_t    *addrs_gpus[UVM_MIGRATE_VMA_MAX_PAGES];
121 
122         // Mask of pages with entries in the dma address arrays above
123         DECLARE_BITMAP(page_mask, UVM_MIGRATE_VMA_MAX_PAGES);
124 
125         // Number of pages for which IOMMU mapping were created
126         unsigned  long num_pages;
127     } dma;
128 
129     // Processors where pages are resident before calling migrate_vma
130     uvm_processor_mask_t src_processors;
131 
132     // Array of per-processor page masks with the pages that are resident
133     // before calling migrate_vma.
134     struct {
135         DECLARE_BITMAP(page_mask, UVM_MIGRATE_VMA_MAX_PAGES);
136     } processors[UVM_ID_MAX_PROCESSORS];
137 
138     // Number of pages in the migrate_vma call
139     unsigned long num_pages;
140 
141     // Number of pages that are directly populated on the destination
142     unsigned long num_populate_anon_pages;
143 } migrate_vma_state_t;
144 
145 #if defined(CONFIG_MIGRATE_VMA_HELPER)
146 struct migrate_vma {
147     struct vm_area_struct   *vma;
148     unsigned long           *dst;
149     unsigned long           *src;
150     unsigned long           start;
151     unsigned long           end;
152 };
153 
154 void uvm_migrate_vma_alloc_and_copy_helper(struct vm_area_struct *vma,
155                                            const unsigned long *src,
156                                            unsigned long *dst,
157                                            unsigned long start,
158                                            unsigned long end,
159                                            void *private);
160 
161 void uvm_migrate_vma_finalize_and_map_helper(struct vm_area_struct *vma,
162                                              const unsigned long *src,
163                                              const unsigned long *dst,
164                                              unsigned long start,
165                                              unsigned long end,
166                                              void *private);
167 #else
168 void uvm_migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_state_t *state);
169 void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_state_t *state);
170 #endif // CONFIG_MIGRATE_VMA_HELPER
171 
172 // Populates the given VA range and tries to migrate all the pages to dst_id. If
173 // the destination processor is the CPU, the NUMA node in dst_node_id is used.
174 // The input VA range must be fully backed by VMAs. This function relies on
175 // migrate_vma, which was added in Linux 4.14. If skip_mapped is set to true,
176 // then already mapped pages will not be migrated. For kernels that do not
177 // provide migrate_vma, this function populates the memory using get_user_pages
178 // and returns NV_WARN_NOTHING_TO_DO to complete the migration in user space for
179 // API calls made from userspace. Kernel callers are expected to handle this
180 // error according to their respective usecases. user_space_start and
181 // user_space_length will contain the full input range. skip_mapped is ignored
182 // for such kernels. If the destination is the CPU and dst_node_id is full,
183 // NV_ERR_MORE_PROCESSING_REQUIRED is returned and user-space will call
184 // UVM_MIGRATE with the next preferred CPU node (if more are available),
185 // starting at the address specified by user_space_start. If the destination is
186 // a GPU and a page could not be populated, return NV_ERR_NO_MEMORY. Otherwise,
187 // return NV_OK. This is fine because UvmMigrate/UvmMigrateAsync only guarantee
188 // that the memory is populated somewhere in the system, not that pages moved to
189 // the requested processor.
190 //
191 // migrate_vma does not support file-backed vmas yet. If a file-backed vma is
192 // found, populates the pages corresponding to the vma with get_user_pages() and
193 // returns NV_WARN_NOTHING_TO_DO. The caller is expected to handle this error.
194 // API calls will fall back to user-mode to complete the migration. Kernel
195 // callers are expected to handle this error according to the respective
196 // usecases. If NV_WARN_NOTHING_TO_DO is returned, user_space_start and
197 // user_space_length will contain the intersection of the vma address range and
198 // [start:start + length].
199 //
200 // If a user-mode fallback is required but current->mm != uvm_migrate_args->mm,
201 // NV_ERR_NOT_SUPPORTED is returned since user mode can't perform such a
202 // migration. If the caller is a kernel thread, such as the GPU BH, this check
203 // is ignored since the caller is not expected to take such a fallback.
204 //
205 // Also, if no GPUs have been registered in the VA space, populates the pages
206 // corresponding to the first vma in the requested region using get_user_pages()
207 // and returns NV_WARN_NOTHING_TO_DO to fall back to user space for the
208 // userspace API callers to complete the whole migration. Kernel callers are
209 // expected to handle this error according to their respective usecases.
210 //
211 // If touch is true, a touch will be attempted on all pages in the requested
212 // range. All pages are only guaranteed to have been touched if
213 // NV_WARN_NOTHING_TO_DO or NV_OK is returned.
214 //
215 // Locking: mmap_lock must be held in read or write mode
216 NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args);
217 
218 NV_STATUS uvm_migrate_pageable_init(void);
219 
220 void uvm_migrate_pageable_exit(void);
221 #else // UVM_MIGRATE_VMA_SUPPORTED
222 
223 static NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
224 {
225     NV_STATUS status;
226 
227     if (current->mm != uvm_migrate_args->mm && !(current->flags & PF_KTHREAD))
228         return NV_ERR_NOT_SUPPORTED;
229 
230     status = uvm_populate_pageable(uvm_migrate_args->mm,
231                                    uvm_migrate_args->start,
232                                    uvm_migrate_args->length,
233                                    0,
234                                    uvm_migrate_args->touch,
235                                    uvm_migrate_args->populate_permissions);
236     if (status != NV_OK)
237         return status;
238 
239     *(uvm_migrate_args->user_space_start) = uvm_migrate_args->start;
240     *(uvm_migrate_args->user_space_length) = uvm_migrate_args->length;
241 
242     return NV_WARN_NOTHING_TO_DO;
243 }
244 
245 static NV_STATUS uvm_migrate_pageable_init(void)
246 {
247     return NV_OK;
248 }
249 
250 static void uvm_migrate_pageable_exit(void)
251 {
252 }
253 
254 #endif // UVM_MIGRATE_VMA_SUPPORTED
255 
256 #endif
257