1 /*
2 * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
3 *
4 * LICENSE_BEGIN
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 *
38 * LICENSE_END
39 *
40 *
41 */
42
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <fcntl.h>
47 #include <errno.h>
48 #include <unistd.h>
49 #include <sys/mman.h>
50
51 #include "usnic_direct.h"
52 #include "usd.h"
53 #include "usd_ib_cmd.h"
54
55 /*
56 * Issue driver command to register memory region
57 */
58 int
usd_reg_mr(struct usd_device * dev,void * vaddr,size_t length,struct usd_mr ** mr_o)59 usd_reg_mr(
60 struct usd_device *dev,
61 void *vaddr,
62 size_t length,
63 struct usd_mr **mr_o)
64 {
65 struct usd_mr *mr;
66 int ret;
67
68 mr = calloc(sizeof(*mr), 1);
69 if (mr == NULL) {
70 return -errno;
71 }
72
73 ret = usd_ib_cmd_reg_mr(dev, vaddr, length, mr);
74
75 if (ret == 0) {
76 mr->umr_dev = dev;
77 mr->umr_vaddr = vaddr;
78 mr->umr_length = length;
79 *mr_o = mr;
80 } else {
81 free(mr);
82 }
83
84 return ret;
85 }
86
87 /*
88 * Issue driver command to de-register memory region
89 */
90 int
usd_dereg_mr(struct usd_mr * mr)91 usd_dereg_mr(
92 struct usd_mr *mr)
93 {
94 int ret;
95
96 ret = usd_ib_cmd_dereg_mr(mr->umr_dev, mr);
97 if (ret == 0)
98 free(mr);
99
100 return ret;
101 }
102
103 /*
104 * Used to allocate memory and an mr to go with it all in one go. Used
105 * to provide memory to the vnic_* functions that call pci_alloc_consistant
106 * We want to return a nicely aligned chunk of memory preceded by struct usd_mr.
107 * We don't know the alignment of the memory we get back, so allocate a big
108 * enough chunk to hold the following:
109 * struct usd_mr
110 * N pad bytes
111 * true length and pointer to usd_mr
112 * page aligned buffer for user
113 */
114 int
usd_alloc_mr(struct usd_device * dev,size_t size,void ** vaddr_o)115 usd_alloc_mr(
116 struct usd_device *dev,
117 size_t size,
118 void **vaddr_o)
119 {
120 void *vaddr;
121 void *base_addr;
122 struct usd_mr *mr;
123 size_t true_size;
124 size_t metadata_size;
125 size_t madv_size;
126 int ret;
127
128 metadata_size = sizeof(struct usd_mr) + 3 * sizeof(uintptr_t);
129 madv_size = ALIGN(size, sysconf(_SC_PAGESIZE));
130 true_size = madv_size + metadata_size + sysconf(_SC_PAGESIZE) - 1;
131 base_addr = mmap(NULL, true_size, PROT_READ | PROT_WRITE,
132 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
133 if (base_addr == NULL || base_addr == MAP_FAILED) {
134 usd_err("Failed to mmap region of size %lu\n", true_size);
135 return -errno;
136 }
137 mr = base_addr;
138 vaddr =
139 (void *) ALIGN((uintptr_t) base_addr + metadata_size,
140 sysconf(_SC_PAGESIZE));
141 ((uintptr_t *) vaddr)[-1] = (uintptr_t) mr;
142 ((uintptr_t *) vaddr)[-2] = true_size;
143 ((uintptr_t *) vaddr)[-3] = madv_size;
144
145 /*
146 * Disable copy-on-write for memories internally used by USD.
147 * For application buffers, disabling copy-on-write should be provided by
148 * usd wrapper such as libfabric or verbs plugin if fork is supported.
149 * The memory to be registered starts from page-aligned address, and ends
150 * at page boundary, so it's impossible for a page to be updated
151 * with multiple madvise calls when each call reference different VAs on
152 * the same page. This allows to avoid the need to reference count
153 * the pages that get updated with mutiple madvise calls. For details,
154 * see libibverbs ibv_dont_forkrange implementations.
155 */
156 ret = madvise(vaddr, madv_size, MADV_DONTFORK);
157 if (ret != 0) {
158 usd_err("Failed to disable child's access to memory %p size %lu\n",
159 vaddr, size);
160 ret = errno;
161 goto err_unmap;
162 }
163
164 ret = usd_ib_cmd_reg_mr(dev, vaddr, size, mr);
165 if (ret != 0) {
166 usd_err("Failed to register memory region %p, size %lu\n",
167 vaddr, size);
168 goto err_madvise;
169 }
170 mr->umr_dev = dev;
171
172 *vaddr_o = vaddr;
173 return 0;
174
175 err_madvise:
176 madvise(vaddr, ALIGN(size, sysconf(_SC_PAGESIZE)), MADV_DOFORK);
177 err_unmap:
178 munmap(base_addr, true_size);
179 return ret;
180 }
181
182 /*
183 * See usd_alloc_mr() for explanation of:
184 * mr = (struct usd_mr *)((uintptr_t *)vaddr)[-1];
185 */
186 int
usd_free_mr(void * vaddr)187 usd_free_mr(
188 void *vaddr)
189 {
190 struct usd_mr *mr;
191 size_t true_size;
192 size_t madv_size;
193 int ret;
194
195 mr = (struct usd_mr *) ((uintptr_t *) vaddr)[-1];
196 true_size = ((uintptr_t *) vaddr)[-2];
197 madv_size = ((uintptr_t *) vaddr)[-3];
198
199 ret = usd_ib_cmd_dereg_mr(mr->umr_dev, mr);
200 if (ret == 0) {
201 madvise(vaddr, madv_size, MADV_DOFORK);
202 munmap(mr, true_size);
203 }
204
205 return ret;
206 }
207
208 /*
209 * Utility function for vnic_* routines
210 */
211 char *
pci_name(struct pci_dev * pdev)212 pci_name(
213 struct pci_dev *pdev)
214 {
215 struct usd_device *dev;
216
217 dev = (struct usd_device *) pdev;
218
219 return dev->ud_ctx->ucx_ib_dev->id_usnic_name;
220 }
221