1 /*
2 * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
3 * Copyright (c) 2015-2017 Los Alamos National Security, LLC.
4 * All rights reserved.
5 * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36 #if HAVE_CONFIG_H
37 #include <config.h>
38 #endif /* HAVE_CONFIG_H */
39
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <netdb.h>
43 #include <netinet/in.h>
44 #include <netinet/tcp.h>
45 #include <poll.h>
46 #include <stdarg.h>
47 #include <stddef.h>
48 #include <stdio.h>
49 #include <string.h>
50 #include <sys/select.h>
51 #include <sys/socket.h>
52 #include <sys/types.h>
53 #include <sys/time.h>
54 #include <unistd.h>
55 #include <stdlib.h>
56 #include <linux/limits.h>
57 #include <sys/syscall.h>
58
59 #include "alps/alps.h"
60 #include "alps/alps_toolAssist.h"
61 #include "alps/libalpsutil.h"
62 #include "alps/libalpslli.h"
63
64 #include "gnix.h"
65 #include "gnix_util.h"
66
67 static bool app_init;
68 /* Filled in by __gnix_app_init */
69 static uint8_t gnix_app_ptag;
70 static uint32_t gnix_app_cookie;
71 static uint32_t gnix_pes_on_node;
72 static int gnix_pe_node_rank = -1;
73 #if HAVE_CRITERION
74 int gnix_first_pe_on_node; /* globally visible for criterion */
75 #else
76 static int gnix_first_pe_on_node;
77 #endif
78 /* CCM/ccmlogin specific stuff */
79 static bool ccm_init;
80 /* This file provides ccm_alps_info */
81 #define CCM_ALPS_INFO_FILE "/tmp/ccm_alps_info"
82 typedef struct ccm_alps_info {
83 uint32_t version;
84 uint8_t ptag;
85 uint32_t cookie;
86 } ccm_alps_info_t;
87 /* Format for the nodelist filename: $HOME/.crayccm/ccmnodlist.<WLM jobid> */
88 #define CCM_NODELIST_FN ".crayccm/ccm_nodelist."
89 /* alps specific stuff */
90 static uint64_t gnix_apid;
91 static alpsAppLayout_t gnix_appLayout;
92 static uint32_t gnix_device_id;
93 static int gnix_cq_limit;
94 /* These are not used currently and could be static to gnix_alps_init */
95 static int alps_init;
96 static int *gnix_app_placementList;
97 static int *gnix_app_targetNids;
98 static int *gnix_app_targetPes;
99 static int *gnix_app_targetLen;
100 static struct in_addr *gnix_app_targetIps;
101 static int *gnix_app_startPe;
102 static int *gnix_app_totalPes;
103 static int *gnix_app_nodePes;
104 static int *gnix_app_peCpus;
105
106 fastlock_t __gnix_alps_lock;
107
_gnix_get_cq_limit(void)108 int _gnix_get_cq_limit(void)
109 {
110 return gnix_cq_limit;
111 }
112
__gnix_ccm_cleanup(void)113 static inline void __gnix_ccm_cleanup(void)
114 {
115 ccm_init = false;
116 }
117
__gnix_alps_cleanup(void)118 static inline void __gnix_alps_cleanup(void)
119 {
120 alps_app_lli_lock();
121
122 if (gnix_app_placementList)
123 free(gnix_app_placementList);
124 if (gnix_app_targetNids)
125 free(gnix_app_targetNids);
126 if (gnix_app_targetPes)
127 free(gnix_app_targetPes);
128 if (gnix_app_targetLen)
129 free(gnix_app_targetLen);
130 if (gnix_app_targetIps)
131 free(gnix_app_targetIps);
132 if (gnix_app_startPe)
133 free(gnix_app_startPe);
134 if (gnix_app_totalPes)
135 free(gnix_app_totalPes);
136 if (gnix_app_nodePes)
137 free(gnix_app_nodePes);
138 if (gnix_app_peCpus)
139 free(gnix_app_peCpus);
140
141 alps_init = false;
142
143 alps_app_lli_unlock();
144 }
145
_gnix_app_cleanup(void)146 void _gnix_app_cleanup(void)
147 {
148 if (alps_init) {
149 __gnix_alps_cleanup();
150 } else if (ccm_init) {
151 __gnix_ccm_cleanup();
152 }
153 }
154
155 /* There are two types of errors that can happen in this function:
156 * - CCM ALPS info file not found
157 * - Failure while trying to get ptag, cookie and PEs/node
158 * Currently we don't distinguish between the two.
159 */
__gnix_ccm_init(void)160 static int __gnix_ccm_init(void)
161 {
162 int rc, fd;
163 FILE *f;
164 char *nodefile;
165 char nodelist[PATH_MAX];
166 const char *home;
167 ccm_alps_info_t info;
168 uint32_t num_nids = 0;
169
170 GNIX_DEBUG(FI_LOG_FABRIC, "Reading job info file %s\n",
171 CCM_ALPS_INFO_FILE);
172
173 fd = open(CCM_ALPS_INFO_FILE, O_RDONLY);
174 if (fd < 0) {
175 return -FI_EIO;
176 }
177
178 rc = read(fd, &info, sizeof(ccm_alps_info_t));
179 if (rc != sizeof(ccm_alps_info_t))
180 return -FI_EIO;
181
182 gnix_app_ptag = info.ptag;
183 gnix_app_cookie = info.cookie;
184
185 close(fd);
186 GNIX_DEBUG(FI_LOG_FABRIC, "Ptag=0x%x, cookie=0x%x\n",
187 gnix_app_ptag, gnix_app_cookie);
188
189 home = getenv("HOME");
190 /* use the WLM node file if using PBS */
191 nodefile = getenv("PBS_NODEFILE");
192 if (!nodefile) {
193 const char *jobid = getenv("SLURM_JOB_ID");
194 if (!jobid) {
195 jobid = getenv("SLURM_JOBID");
196 }
197 snprintf(nodelist, PATH_MAX, "%s/%s%s", home ? home : ".",
198 CCM_NODELIST_FN, jobid ? jobid : "sdb");
199 nodefile = nodelist;
200 }
201 f = fopen(nodefile, "r");
202 if (f) {
203 char mynid[PATH_MAX];
204 char next_nid[PATH_MAX];
205
206 rc = gethostname(mynid, PATH_MAX);
207 if (rc) {
208 /* use the first address */
209 rc = fscanf(f, "%s\n", mynid);
210 /* assume this one worked, error case is same */
211 num_nids++;
212 }
213 while (true) {
214 rc = fscanf(f, "%s\n", next_nid);
215 if (rc == 1) {
216 if (strcmp(mynid, next_nid) == 0) {
217 num_nids++;
218 }
219 } else {
220 break;
221 }
222 }
223 gnix_pes_on_node = num_nids;
224 fclose(f);
225 } else {
226 /* what would be a better default? */
227 GNIX_WARN(FI_LOG_FABRIC,
228 "CCM nodelist not found. Assuming 1 PE per node\n");
229 gnix_pes_on_node = 1;
230 }
231 GNIX_DEBUG(FI_LOG_FABRIC, "pes per node=%u\n", gnix_pes_on_node);
232
233 /* Don't really need to do this here, but wanted to be clear */
234 gnix_app_placementList = NULL;
235 gnix_app_targetNids = NULL;
236 gnix_app_targetPes = NULL;
237 gnix_app_targetLen = NULL;
238 gnix_app_targetIps = NULL;
239 gnix_app_startPe = NULL;
240 gnix_app_totalPes = NULL;
241 gnix_app_nodePes = NULL;
242 gnix_app_peCpus = NULL;
243
244 ccm_init = true;
245 return FI_SUCCESS;
246 }
247
__gnix_alps_init(void)248 static int __gnix_alps_init(void)
249 {
250 char *cptr = NULL;
251 int ret = FI_SUCCESS;
252 int my_pe = -1;
253 int alps_status = 0;
254 size_t alps_count;
255 alpsAppLLIGni_t *rdmacred_rsp = NULL;
256 alpsAppGni_t *rdmacred_buf = NULL;
257
258 fastlock_acquire(&__gnix_alps_lock);
259 /* lli_lock doesn't return anything useful */
260 ret = alps_app_lli_lock();
261
262 if (alps_init) {
263 /* alps lli lock protects alps_init for now */
264 alps_app_lli_unlock();
265 fastlock_release(&__gnix_alps_lock);
266 return ret;
267 }
268
269 /*
270 * First get our apid
271 */
272 ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
273 if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
274 GNIX_WARN(FI_LOG_FABRIC, "lli put failed, ret=%d(%s)\n", ret,
275 strerror(errno));
276 ret = -FI_EIO;
277 goto err;
278 }
279
280 ret = alps_app_lli_get_response(&alps_status, &alps_count);
281 if (alps_status != ALPS_APP_LLI_ALPS_STAT_OK) {
282 GNIX_WARN(FI_LOG_FABRIC, "lli get response failed, "
283 "alps_status=%d(%s)\n", alps_status,
284 strerror(errno));
285 ret = -FI_EIO;
286 goto err;
287 }
288
289 ret = alps_app_lli_get_response_bytes(&gnix_apid, sizeof(gnix_apid));
290 if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
291 GNIX_WARN(FI_LOG_FABRIC,
292 "lli get response failed, ret=%d(%s)\n",
293 ret, strerror(errno));
294 ret = -FI_EIO;
295 goto err;
296 }
297
298 /*
299 * now get the GNI rdma credentials info
300 */
301 ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_GNI, NULL, 0);
302 if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
303 GNIX_WARN(FI_LOG_FABRIC, "lli put failed, ret=%d(%s)\n",
304 ret, strerror(errno));
305 ret = -FI_EIO;
306 goto err;
307 }
308
309 ret = alps_app_lli_get_response(&alps_status, &alps_count);
310 if (alps_status != ALPS_APP_LLI_ALPS_STAT_OK) {
311 GNIX_WARN(FI_LOG_FABRIC,
312 "lli get response failed, alps_status=%d(%s)\n",
313 alps_status, strerror(errno));
314 ret = -FI_EIO;
315 goto err;
316 }
317
318 rdmacred_rsp = malloc(alps_count);
319 if (rdmacred_rsp == NULL) {
320 ret = -FI_ENOMEM;
321 goto err;
322 }
323
324 memset(rdmacred_rsp, 0, alps_count);
325
326 ret = alps_app_lli_get_response_bytes(rdmacred_rsp, alps_count);
327 if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
328 GNIX_WARN(FI_LOG_FABRIC,
329 "lli get response failed, ret=%d(%s)\n",
330 ret, strerror(errno));
331 ret = -FI_EIO;
332 goto err;
333 }
334
335 rdmacred_buf = (alpsAppGni_t *) rdmacred_rsp->u.buf;
336
337 /*
338 * just use the first ptag/cookie for now
339 */
340
341 gnix_app_ptag = rdmacred_buf[0].ptag;
342 gnix_app_cookie = rdmacred_buf[0].cookie;
343
344 /*
345 * alps_get_placement_info(uint64_t apid, alpsAppLayout_t *appLayout,
346 * int **placementList, int **targetNids, int **targetPes,
347 * int **targetLen, struct in_addr **targetIps, int **startPe,
348 * int **totalPes, int **nodePes, int **peCpus);
349 */
350 ret = alps_get_placement_info(gnix_apid, &gnix_appLayout,
351 &gnix_app_placementList,
352 &gnix_app_targetNids,
353 &gnix_app_targetPes,
354 &gnix_app_targetLen,
355 &gnix_app_targetIps,
356 &gnix_app_startPe,
357 &gnix_app_totalPes,
358 &gnix_app_nodePes,
359 &gnix_app_peCpus);
360 if (ret != 1) {
361 GNIX_WARN(FI_LOG_FABRIC,
362 "alps_get_placement_info failed, ret=%d(%s)\n",
363 ret, strerror(errno));
364 ret = -FI_EIO;
365 goto err;
366 }
367
368 gnix_pes_on_node = gnix_appLayout.numPesHere;
369 gnix_first_pe_on_node = gnix_appLayout.firstPe;
370
371 if ((cptr = getenv("PMI_FORK_RANK")) != NULL) {
372 my_pe = atoi(cptr);
373 } else {
374 if ((cptr = getenv("ALPS_APP_PE")) != NULL) {
375 my_pe = atoi(cptr);
376 }
377 }
378
379 /*
380 * compute local pe rank, assuming we got our global PE rank
381 * via either an ALPS (or ALPS SLURM plugin) or Cray PMI,
382 * otherwise set to -1.
383 */
384 if (my_pe != -1)
385 gnix_pe_node_rank = my_pe - gnix_first_pe_on_node;
386
387 alps_init = true;
388
389 ret = 0;
390 err:
391 alps_app_lli_unlock();
392 fastlock_release(&__gnix_alps_lock);
393 if (rdmacred_rsp != NULL) {
394 free(rdmacred_rsp);
395 }
396
397 return ret;
398 }
399
__gnix_app_init(void)400 static int __gnix_app_init(void)
401 {
402 int ret;
403
404 if (app_init) {
405 return FI_SUCCESS;
406 }
407
408 /* Try CCM first */
409 ret = __gnix_ccm_init();
410 if (ret) {
411 ret = __gnix_alps_init();
412 }
413
414 if (ret == FI_SUCCESS) {
415 app_init = true;
416 }
417
418 gnix_device_id = 0;
419 return ret;
420
421 }
422
gnixu_get_rdma_credentials(void * addr,uint8_t * ptag,uint32_t * cookie)423 int gnixu_get_rdma_credentials(void *addr, uint8_t *ptag, uint32_t *cookie)
424 {
425 int ret = FI_SUCCESS;
426
427 /*TODO: If addr is used, ensure that ep->info->addr_format is checked*/
428
429 if ((ptag == NULL) || (cookie == NULL)) {
430 return -FI_EINVAL;
431 }
432
433 ret = __gnix_app_init();
434 if (ret) {
435 GNIX_WARN(FI_LOG_FABRIC,
436 "__gnix_app_init() failed, ret=%d(%s)\n",
437 ret, strerror(errno));
438 return ret;
439 }
440
441 /*
442 * TODO: need to handle non null addr differently at some point,
443 * a non-NULL addr can be used to acquire RDMA credentials other than
444 * those assigned by ALPS/nativized slurm.
445 */
446 *ptag = gnix_app_ptag;
447 *cookie = gnix_app_cookie;
448
449 return ret;
450 }
451
452
453 #define NUM_GNI_RC (GNI_RC_ERROR_NOMEM+1)
454 static int gnix_rc_table[NUM_GNI_RC] = {
455 [GNI_RC_SUCCESS] = FI_SUCCESS,
456 [GNI_RC_NOT_DONE] = -FI_EAGAIN,
457 [GNI_RC_INVALID_PARAM] = -FI_EINVAL,
458 [GNI_RC_ERROR_RESOURCE] = -FI_EBUSY,
459 [GNI_RC_TIMEOUT] = -FI_ETIMEDOUT,
460 [GNI_RC_PERMISSION_ERROR] = -FI_EACCES,
461 [GNI_RC_DESCRIPTOR_ERROR] = -FI_EOTHER,
462 [GNI_RC_ALIGNMENT_ERROR] = -FI_EINVAL,
463 [GNI_RC_INVALID_STATE] = -FI_EOPBADSTATE,
464 [GNI_RC_NO_MATCH] = -FI_EINVAL,
465 [GNI_RC_SIZE_ERROR] = -FI_ETOOSMALL,
466 [GNI_RC_TRANSACTION_ERROR] = -FI_ECANCELED,
467 [GNI_RC_ILLEGAL_OP] = -FI_EOPNOTSUPP,
468 [GNI_RC_ERROR_NOMEM] = -FI_ENOMEM
469 };
470
gnixu_to_fi_errno(int err)471 int gnixu_to_fi_errno(int err)
472 {
473 if (err >= 0 && err < NUM_GNI_RC)
474 return gnix_rc_table[err];
475 else
476 return -FI_EOTHER;
477 }
478
479 /* Indicate that the next task spawned will be restricted to cores assigned to
480 * corespec. */
_gnix_task_is_not_app(void)481 int _gnix_task_is_not_app(void)
482 {
483 size_t count;
484 int fd;
485 char filename[PATH_MAX];
486 int rc = 0;
487 char val_str[] = "0";
488 int val_str_len = strlen(val_str);
489
490 snprintf(filename, PATH_MAX, "/proc/self/task/%ld/task_is_app",
491 syscall(SYS_gettid));
492 fd = open(filename, O_WRONLY);
493 if (fd < 0) {
494 GNIX_WARN(FI_LOG_FABRIC, "open(%s) failed, errno=%s\n",
495 filename, strerror(errno));
496 return -errno;
497 }
498
499 count = write(fd, val_str, val_str_len);
500 if (count != val_str_len) {
501 GNIX_WARN(FI_LOG_FABRIC, "write(%s, %s) failed, errno=%s\n",
502 filename, val_str, strerror(errno));
503 rc = -errno;
504 }
505 close(fd);
506
507 return rc;
508 }
509
gnix_write_proc_job(char * val_str)510 static int gnix_write_proc_job(char *val_str)
511 {
512 size_t count;
513 int fd;
514 int rc = 0;
515 char *filename = "/proc/job";
516 int val_str_len = strlen(val_str);
517
518 fd = open(filename, O_WRONLY);
519 if (fd < 0) {
520 GNIX_WARN(FI_LOG_FABRIC, "open(%s) failed, errno=%s\n",
521 filename, strerror(errno));
522 return -errno;
523 }
524
525 count = write(fd, val_str, val_str_len);
526 if (count != val_str_len) {
527 GNIX_WARN(FI_LOG_FABRIC, "write(%s) failed, errno=%s\n",
528 val_str, strerror(errno));
529 rc = -errno;
530 }
531 close(fd);
532
533 return rc;
534 }
535
536 /* Indicate that the next task spawned will be restricted to CPUs that are not
537 * assigned to the app and not assigned to corespec. */
_gnix_job_enable_unassigned_cpus(void)538 int _gnix_job_enable_unassigned_cpus(void)
539 {
540 return gnix_write_proc_job("enable_affinity_unassigned_cpus");
541 }
542
543 /* Indicate that the next task spawned will be restricted to CPUs that are
544 * assigned to the app. */
_gnix_job_disable_unassigned_cpus(void)545 int _gnix_job_disable_unassigned_cpus(void)
546 {
547 return gnix_write_proc_job("disable_affinity_unassigned_cpus");
548 }
549
550 /* Indicate that the next task spawned should adhere to the affinity rules. */
_gnix_job_enable_affinity_apply(void)551 int _gnix_job_enable_affinity_apply(void)
552 {
553 return gnix_write_proc_job("enable_affinity_apply");
554 }
555
556 /* Indicate that the next task spawned should avoid the affinity rules and be
557 * allowed to run anywhere in the app cpuset. */
_gnix_job_disable_affinity_apply(void)558 int _gnix_job_disable_affinity_apply(void)
559 {
560 return gnix_write_proc_job("disable_affinity_apply");
561 }
562
563
_gnix_job_fma_limit(uint32_t dev_id,uint8_t ptag,uint32_t * limit)564 int _gnix_job_fma_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit)
565 {
566 gni_return_t status;
567 gni_job_res_desc_t job_res_desc;
568
569 if (!limit) {
570 return -FI_EINVAL;
571 }
572
573 status = GNI_GetJobResInfo(dev_id, ptag, GNI_JOB_RES_FMA, &job_res_desc);
574 if (status) {
575 GNIX_WARN(FI_LOG_FABRIC,
576 "GNI_GetJobResInfo(%d, %d) failed, status=%s\n",
577 dev_id, ptag, gni_err_str[status]);
578 return -FI_EINVAL;
579 }
580
581 *limit = job_res_desc.limit;
582 GNIX_INFO(FI_LOG_FABRIC, "fma_limit: %u\n", job_res_desc.limit);
583
584 return FI_SUCCESS;
585 }
586
_gnix_job_cq_limit(uint32_t dev_id,uint8_t ptag,uint32_t * limit)587 int _gnix_job_cq_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit)
588 {
589 gni_return_t status;
590 gni_job_res_desc_t job_res_desc;
591
592 if (!limit) {
593 return -FI_EINVAL;
594 }
595
596 status = GNI_GetJobResInfo(dev_id, ptag, GNI_JOB_RES_CQ, &job_res_desc);
597 if (status) {
598 GNIX_WARN(FI_LOG_FABRIC,
599 "GNI_GetJobResInfo(%d, %d) failed, status=%s\n",
600 dev_id, ptag, gni_err_str[status]);
601 return -FI_EINVAL;
602 }
603
604 *limit = job_res_desc.limit;
605 GNIX_INFO(FI_LOG_FABRIC, "cq_limit: %u\n", job_res_desc.limit);
606
607 return FI_SUCCESS;
608 }
609
_gnix_pes_on_node(uint32_t * num_pes)610 int _gnix_pes_on_node(uint32_t *num_pes)
611 {
612 int rc;
613
614 if (!num_pes) {
615 return -FI_EINVAL;
616 }
617
618 rc = __gnix_app_init();
619 if (rc) {
620 GNIX_WARN(FI_LOG_FABRIC,
621 "__gnix_app_init() failed, ret=%d(%s)\n",
622 rc, strerror(errno));
623 return rc;
624 }
625
626 *num_pes = gnix_pes_on_node;
627 GNIX_INFO(FI_LOG_FABRIC, "num_pes: %u\n", gnix_appLayout.numPesHere);
628
629 return FI_SUCCESS;
630 }
631
_gnix_pe_node_rank(int * pe_node_rank)632 int _gnix_pe_node_rank(int *pe_node_rank)
633 {
634 int rc;
635
636 if (!pe_node_rank) {
637 return -FI_EINVAL;
638 }
639
640 rc = __gnix_app_init();
641 if (rc) {
642 GNIX_WARN(FI_LOG_FABRIC,
643 "__gnix_app_init() failed, ret=%d(%s)\n",
644 rc, strerror(errno));
645 return rc;
646 }
647
648 if (gnix_pe_node_rank != -1) {
649 *pe_node_rank = gnix_pe_node_rank;
650 rc = FI_SUCCESS;
651 } else
652 rc = -FI_EADDRNOTAVAIL;
653
654 GNIX_INFO(FI_LOG_FABRIC, "pe_node_rank: %u\n", gnix_pe_node_rank);
655
656 return rc;
657 }
658
_gnix_nics_per_rank(uint32_t * nics_per_rank)659 int _gnix_nics_per_rank(uint32_t *nics_per_rank)
660 {
661 int rc;
662 uint32_t npes, fmas, cqs, limiting_resource;
663
664 if (!nics_per_rank) {
665 return -FI_EINVAL;
666 }
667
668 rc = __gnix_app_init();
669 if (rc) {
670 GNIX_WARN(FI_LOG_FABRIC,
671 "__gnix_app_init() failed, ret=%d(%s)\n",
672 rc, strerror(errno));
673 return rc;
674 }
675
676 rc = _gnix_job_fma_limit(gnix_device_id, gnix_app_ptag, &fmas);
677 if (rc) {
678 return rc;
679 }
680
681 rc = _gnix_job_cq_limit(gnix_device_id, gnix_app_ptag, &cqs);
682 if (rc) {
683 return rc;
684 }
685
686 gnix_cq_limit = cqs;
687 cqs /= GNIX_CQS_PER_EP;
688
689 rc = _gnix_pes_on_node(&npes);
690 if (rc) {
691 return rc;
692 }
693
694 limiting_resource = fmas > cqs ? cqs : fmas;
695
696 *nics_per_rank = limiting_resource / npes;
697
698 return FI_SUCCESS;
699 }
700
_gnix_dump_gni_res(uint8_t ptag)701 void _gnix_dump_gni_res(uint8_t ptag)
702 {
703 int i;
704 gni_return_t status;
705 gni_dev_res_desc_t dev_res_desc;
706 gni_job_res_desc_t job_res_desc;
707 #define BUF_SZ 4096
708 char buf[BUF_SZ];
709 int size = BUF_SZ, written = 0;
710
711 if (!fi_log_enabled(&gnix_prov, FI_LOG_WARN, FI_LOG_FABRIC))
712 return;
713
714 written += snprintf(buf + written, size - written,
715 "Device Resources:\n");
716 for (i = GNI_DEV_RES_FIRST+1; i < GNI_DEV_RES_LAST; i++) {
717 status = GNI_GetDevResInfo(0, i, &dev_res_desc);
718 if (status == GNI_RC_SUCCESS) {
719 written += snprintf(buf + written, size - written,
720 "dev res: %9s, avail: %lu res: %lu held: %lu total: %lu\n",
721 gni_dev_res_to_str(i),
722 dev_res_desc.available,
723 dev_res_desc.reserved,
724 dev_res_desc.held,
725 dev_res_desc.total);
726 }
727 }
728
729 GNIX_WARN(FI_LOG_FABRIC, "%s", buf);
730
731 written = 0;
732 written += snprintf(buf + written, size - written,
733 "Job Resources:\n");
734 for (i = GNI_JOB_RES_FIRST+1; i < GNI_JOB_RES_LAST; i++) {
735 status = GNI_GetJobResInfo(0, ptag, i, &job_res_desc);
736 if (status == GNI_RC_SUCCESS) {
737 written += snprintf(buf + written, size - written,
738 "ptag[%d] job res: %9s used: %lu limit: %lu\n",
739 ptag, gni_job_res_to_str(i),
740 job_res_desc.used,
741 job_res_desc.limit);
742 }
743 }
744
745 GNIX_WARN(FI_LOG_FABRIC, "%s", buf);
746 }
747
_gnix_get_num_corespec_cpus(uint32_t * num_core_spec_cpus)748 int _gnix_get_num_corespec_cpus(uint32_t *num_core_spec_cpus)
749 {
750 int ret = -FI_ENODATA;
751 int ncpus = 0;
752 FILE *fd = NULL;
753 char buffer[4096], *line, *field;
754 static bool already_called;
755 static uint32_t cached_num_corespec_cpus;
756
757 if (num_core_spec_cpus == NULL)
758 return -FI_EINVAL;
759
760 if (already_called == true) {
761 *num_core_spec_cpus = cached_num_corespec_cpus;
762 return FI_SUCCESS;
763 }
764
765 fd = fopen("/proc/job", "r");
766 if (!fd) {
767 GNIX_WARN(FI_LOG_FABRIC,
768 "open of /proc/job returned %s", strerror(errno));
769 return -errno;
770 }
771
772 while (1) {
773 line = fgets(buffer, sizeof(buffer), fd);
774 if (!line)
775 break;
776
777 line = strstr(line, "corespec");
778 if (line != NULL) {
779 field = strtok(line, " ");
780 field = strtok(NULL, " ");
781 if (!strcmp(field, "num_sys_cpus")) {
782 field = strtok(NULL, " ");
783 ncpus = atoi(field);
784 }
785 ret = FI_SUCCESS;
786 break;
787 }
788 }
789
790 *num_core_spec_cpus = ncpus;
791 cached_num_corespec_cpus = ncpus;
792
793 already_called = true;
794
795 fclose(fd);
796
797 return ret;
798 }
799
800