1 /*
2 * Copyright 2014-2019, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * * Neither the name of the copyright holder nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <ctype.h>
38 #include <inttypes.h>
39 #include <dirent.h>
40
41 #include <ofi_osd.h>
42 #include <ofi.h>
43 #include <rdma/fi_errno.h>
44 #include <ofi_mem.h>
45 #include <rdma/fabric.h>
46
47
48 static const int OFI_CACHE_SIZE = 64;
49
50 size_t *page_sizes = NULL;
51 size_t num_page_sizes = 0;
52
53
ofi_mem_init(void)54 void ofi_mem_init(void)
55 {
56 struct dirent **pglist = NULL;
57 size_t max_cnt;
58 ssize_t hpsize;
59 long psize;
60 int n;
61
62 psize = ofi_get_page_size();
63 if (psize < 0)
64 return;
65
66 hpsize = ofi_get_hugepage_size();
67 if (hpsize > 0) {
68 n = scandir("/sys/kernel/mm/hugepages", &pglist, NULL, NULL);
69 max_cnt = (n < 0) ? 2 : n + 1;
70 } else {
71 max_cnt = 1;
72 n = 0;
73 }
74
75 page_sizes = calloc(max_cnt, sizeof(*page_sizes));
76 if (!page_sizes)
77 goto free_list;
78
79 page_sizes[OFI_PAGE_SIZE] = psize;
80 if (hpsize > 0) {
81 page_sizes[OFI_DEF_HUGEPAGE_SIZE] = hpsize;
82 num_page_sizes = 2;
83 } else {
84 num_page_sizes = 1;
85 }
86
87 while (n-- > 0) {
88 if (sscanf(pglist[n]->d_name, "hugepages-%zukB", &hpsize) == 1) {
89 hpsize *= 1024;
90 if (hpsize != page_sizes[OFI_DEF_HUGEPAGE_SIZE])
91 page_sizes[num_page_sizes++] = hpsize;
92 }
93 free(pglist[n]);
94 }
95
96 free_list:
97 while (n-- > 0)
98 free(pglist[n]);
99 free(pglist);
100 }
101
ofi_mem_fini(void)102 void ofi_mem_fini(void)
103 {
104 free(page_sizes);
105 }
106
ofi_get_mem_size(void)107 size_t ofi_get_mem_size(void)
108 {
109 long page_cnt, page_size;
110 size_t mem_size;
111
112 page_cnt = ofi_sysconf(_SC_PHYS_PAGES);
113 page_size = ofi_get_page_size();
114
115 if (page_cnt <= 0 || page_size <= 0)
116 return 0;
117
118 mem_size = (size_t) page_cnt * (size_t) page_size;
119 if (mem_size < page_cnt || mem_size < page_size)
120 return 0;
121
122 return mem_size;
123 }
124
125
126 uint64_t OFI_RMA_PMEM;
127 void (*ofi_pmem_commit)(const void *addr, size_t len);
128
pmem_commit_clwb(const void * addr,size_t len)129 static void pmem_commit_clwb(const void *addr, size_t len)
130 {
131 uintptr_t uptr;
132
133 for (uptr = (uintptr_t) addr & ~(OFI_CACHE_SIZE - 1);
134 uptr < (uintptr_t) addr + len; uptr += OFI_CACHE_SIZE) {
135 ofi_clwb(uptr);
136 }
137 ofi_sfence();
138 }
139
pmem_commit_clflushopt(const void * addr,size_t len)140 static void pmem_commit_clflushopt(const void *addr, size_t len)
141 {
142 uintptr_t uptr;
143
144 for (uptr = (uintptr_t) addr & ~(OFI_CACHE_SIZE - 1);
145 uptr < (uintptr_t) addr + len; uptr += OFI_CACHE_SIZE) {
146 ofi_clflushopt(uptr);
147 }
148 ofi_sfence();
149 }
150
pmem_commit_clflush(const void * addr,size_t len)151 static void pmem_commit_clflush(const void *addr, size_t len)
152 {
153 uintptr_t uptr;
154
155 for (uptr = (uintptr_t) addr & ~(OFI_CACHE_SIZE - 1);
156 uptr < (uintptr_t) addr + len; uptr += OFI_CACHE_SIZE) {
157 ofi_clflush(uptr);
158 }
159 }
160
ofi_pmem_init(void)161 void ofi_pmem_init(void)
162 {
163 if (ofi_cpu_supports(0x7, OFI_CLWB_REG, OFI_CLWB_BIT)) {
164 ofi_pmem_commit = pmem_commit_clwb;
165 } else if (ofi_cpu_supports(0x7, OFI_CLFLUSHOPT_REG,
166 OFI_CLFLUSHOPT_BIT)) {
167 ofi_pmem_commit = pmem_commit_clflushopt;
168 } else if (ofi_cpu_supports(0x1, OFI_CLFLUSH_REG, OFI_CLFLUSH_BIT)) {
169 ofi_pmem_commit = pmem_commit_clflush;
170 }
171
172 if (ofi_pmem_commit)
173 OFI_RMA_PMEM = FI_RMA_PMEM;
174 }
175