1 /*
2  * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
3  *
4  * LICENSE_BEGIN
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  *
38  * LICENSE_END
39  *
40  *
41  */
42 
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <fcntl.h>
47 #include <errno.h>
48 #include <unistd.h>
49 #include <sys/mman.h>
50 
51 #include "usnic_direct.h"
52 #include "usd.h"
53 #include "usd_ib_cmd.h"
54 
55 /*
56  * Issue driver command to register memory region
57  */
58 int
usd_reg_mr(struct usd_device * dev,void * vaddr,size_t length,struct usd_mr ** mr_o)59 usd_reg_mr(
60     struct usd_device *dev,
61     void *vaddr,
62     size_t length,
63     struct usd_mr **mr_o)
64 {
65     struct usd_mr *mr;
66     int ret;
67 
68     mr = calloc(sizeof(*mr), 1);
69     if (mr == NULL) {
70         return -errno;
71     }
72 
73     ret = usd_ib_cmd_reg_mr(dev, vaddr, length, mr);
74 
75     if (ret == 0) {
76         mr->umr_dev = dev;
77         mr->umr_vaddr = vaddr;
78         mr->umr_length = length;
79         *mr_o = mr;
80     } else {
81         free(mr);
82     }
83 
84     return ret;
85 }
86 
87 /*
88  * Issue driver command to de-register memory region
89  */
90 int
usd_dereg_mr(struct usd_mr * mr)91 usd_dereg_mr(
92     struct usd_mr *mr)
93 {
94     int ret;
95 
96     ret = usd_ib_cmd_dereg_mr(mr->umr_dev, mr);
97     if (ret == 0)
98         free(mr);
99 
100     return ret;
101 }
102 
103 /*
104  * Used to allocate memory and an mr to go with it all in one go.  Used
105  * to provide memory to the vnic_* functions that call pci_alloc_consistant
106  * We want to return a nicely aligned chunk of memory preceded by struct usd_mr.
107  * We don't know the alignment of the memory we get back, so allocate a big
108  * enough chunk to hold the following:
109  *   struct usd_mr
110  *   N pad bytes
111  *   true length and pointer to usd_mr
112  *   page aligned buffer for user
113  */
114 int
usd_alloc_mr(struct usd_device * dev,size_t size,void ** vaddr_o)115 usd_alloc_mr(
116     struct usd_device *dev,
117     size_t size,
118     void **vaddr_o)
119 {
120     void *vaddr;
121     void *base_addr;
122     struct usd_mr *mr;
123     size_t true_size;
124     size_t metadata_size;
125     size_t madv_size;
126     int ret;
127 
128     metadata_size = sizeof(struct usd_mr) + 3 * sizeof(uintptr_t);
129     madv_size = ALIGN(size, sysconf(_SC_PAGESIZE));
130     true_size = madv_size + metadata_size + sysconf(_SC_PAGESIZE) - 1;
131     base_addr = mmap(NULL, true_size, PROT_READ | PROT_WRITE,
132                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
133     if (base_addr == NULL || base_addr == MAP_FAILED) {
134         usd_err("Failed to mmap region of size %lu\n", true_size);
135         return -errno;
136     }
137     mr = base_addr;
138     vaddr =
139         (void *) ALIGN((uintptr_t) base_addr + metadata_size,
140                        sysconf(_SC_PAGESIZE));
141     ((uintptr_t *) vaddr)[-1] = (uintptr_t) mr;
142     ((uintptr_t *) vaddr)[-2] = true_size;
143     ((uintptr_t *) vaddr)[-3] = madv_size;
144 
145     /*
146      * Disable copy-on-write for memories internally used by USD.
147      * For application buffers, disabling copy-on-write should be provided by
148      * usd wrapper such as libfabric or verbs plugin if fork is supported.
149      * The memory to be registered starts from page-aligned address, and ends
150      * at page boundary, so it's impossible for a page to be updated
151      * with multiple madvise calls when each call reference different VAs on
152      * the same page. This allows to avoid the need to reference count
153      * the pages that get updated with mutiple madvise calls. For details,
154      * see libibverbs ibv_dont_forkrange implementations.
155      */
156     ret = madvise(vaddr, madv_size, MADV_DONTFORK);
157     if (ret != 0) {
158         usd_err("Failed to disable child's access to memory %p size %lu\n",
159                 vaddr, size);
160         ret = errno;
161         goto err_unmap;
162     }
163 
164     ret = usd_ib_cmd_reg_mr(dev, vaddr, size, mr);
165     if (ret != 0) {
166         usd_err("Failed to register memory region %p, size %lu\n",
167                 vaddr, size);
168         goto err_madvise;
169     }
170     mr->umr_dev = dev;
171 
172     *vaddr_o = vaddr;
173     return 0;
174 
175 err_madvise:
176     madvise(vaddr, ALIGN(size, sysconf(_SC_PAGESIZE)), MADV_DOFORK);
177 err_unmap:
178     munmap(base_addr, true_size);
179     return ret;
180 }
181 
182 /*
183  * See usd_alloc_mr() for explanation of:
184  *  mr = (struct usd_mr *)((uintptr_t *)vaddr)[-1];
185  */
186 int
usd_free_mr(void * vaddr)187 usd_free_mr(
188     void *vaddr)
189 {
190     struct usd_mr *mr;
191     size_t true_size;
192     size_t madv_size;
193     int ret;
194 
195     mr = (struct usd_mr *) ((uintptr_t *) vaddr)[-1];
196     true_size = ((uintptr_t *) vaddr)[-2];
197     madv_size = ((uintptr_t *) vaddr)[-3];
198 
199     ret = usd_ib_cmd_dereg_mr(mr->umr_dev, mr);
200     if (ret == 0) {
201         madvise(vaddr, madv_size, MADV_DOFORK);
202         munmap(mr, true_size);
203     }
204 
205     return ret;
206 }
207 
208 /*
209  * Utility function for vnic_* routines
210  */
211 char *
pci_name(struct pci_dev * pdev)212 pci_name(
213     struct pci_dev *pdev)
214 {
215     struct usd_device *dev;
216 
217     dev = (struct usd_device *) pdev;
218 
219     return dev->ud_ctx->ucx_ib_dev->id_usnic_name;
220 }
221