1 /*
2  * Copyright (c) 2014 Intel Corporation, Inc.  All rights reserved.
3  * Copyright (c) 2015-2017 Los Alamos National Security, LLC.
4  *                         All rights reserved.
5  * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #if HAVE_CONFIG_H
37 #include <config.h>
38 #endif /* HAVE_CONFIG_H */
39 
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <netdb.h>
43 #include <netinet/in.h>
44 #include <netinet/tcp.h>
45 #include <poll.h>
46 #include <stdarg.h>
47 #include <stddef.h>
48 #include <stdio.h>
49 #include <string.h>
50 #include <sys/select.h>
51 #include <sys/socket.h>
52 #include <sys/types.h>
53 #include <sys/time.h>
54 #include <unistd.h>
55 #include <stdlib.h>
56 #include <linux/limits.h>
57 #include <sys/syscall.h>
58 
59 #include "alps/alps.h"
60 #include "alps/alps_toolAssist.h"
61 #include "alps/libalpsutil.h"
62 #include "alps/libalpslli.h"
63 
64 #include "gnix.h"
65 #include "gnix_util.h"
66 
67 static bool app_init;
68 /* Filled in by __gnix_app_init */
69 static uint8_t gnix_app_ptag;
70 static uint32_t gnix_app_cookie;
71 static uint32_t gnix_pes_on_node;
72 static int gnix_pe_node_rank = -1;
73 #if HAVE_CRITERION
74 int gnix_first_pe_on_node; /* globally visible for  criterion */
75 #else
76 static int gnix_first_pe_on_node;
77 #endif
78 /* CCM/ccmlogin specific stuff */
79 static bool ccm_init;
80 /* This file provides ccm_alps_info */
81 #define CCM_ALPS_INFO_FILE "/tmp/ccm_alps_info"
82 typedef struct ccm_alps_info {
83 	uint32_t version;
84 	uint8_t ptag;
85 	uint32_t cookie;
86 } ccm_alps_info_t;
87 /* Format for the nodelist filename: $HOME/.crayccm/ccmnodlist.<WLM jobid> */
88 #define CCM_NODELIST_FN ".crayccm/ccm_nodelist."
89 /* alps specific stuff */
90 static uint64_t gnix_apid;
91 static alpsAppLayout_t gnix_appLayout;
92 static uint32_t gnix_device_id;
93 static int gnix_cq_limit;
94 /* These are not used currently and could be static to gnix_alps_init */
95 static int alps_init;
96 static int *gnix_app_placementList;
97 static int *gnix_app_targetNids;
98 static int *gnix_app_targetPes;
99 static int *gnix_app_targetLen;
100 static struct in_addr *gnix_app_targetIps;
101 static int *gnix_app_startPe;
102 static int *gnix_app_totalPes;
103 static int *gnix_app_nodePes;
104 static int *gnix_app_peCpus;
105 
106 fastlock_t __gnix_alps_lock;
107 
_gnix_get_cq_limit(void)108 int _gnix_get_cq_limit(void)
109 {
110 	return gnix_cq_limit;
111 }
112 
__gnix_ccm_cleanup(void)113 static inline void __gnix_ccm_cleanup(void)
114 {
115 	ccm_init = false;
116 }
117 
__gnix_alps_cleanup(void)118 static inline void __gnix_alps_cleanup(void)
119 {
120 	alps_app_lli_lock();
121 
122 	if (gnix_app_placementList)
123 		free(gnix_app_placementList);
124 	if (gnix_app_targetNids)
125 		free(gnix_app_targetNids);
126 	if (gnix_app_targetPes)
127 		free(gnix_app_targetPes);
128 	if (gnix_app_targetLen)
129 		free(gnix_app_targetLen);
130 	if (gnix_app_targetIps)
131 		free(gnix_app_targetIps);
132 	if (gnix_app_startPe)
133 		free(gnix_app_startPe);
134 	if (gnix_app_totalPes)
135 		free(gnix_app_totalPes);
136 	if (gnix_app_nodePes)
137 		free(gnix_app_nodePes);
138 	if (gnix_app_peCpus)
139 		free(gnix_app_peCpus);
140 
141 	alps_init = false;
142 
143 	alps_app_lli_unlock();
144 }
145 
_gnix_app_cleanup(void)146 void _gnix_app_cleanup(void)
147 {
148 	if (alps_init) {
149 		__gnix_alps_cleanup();
150 	} else if (ccm_init) {
151 		__gnix_ccm_cleanup();
152 	}
153 }
154 
155 /* There are two types of errors that can happen in this function:
156  * - CCM ALPS info file not found
157  * - Failure while trying to get ptag, cookie and PEs/node
158  *  Currently we don't distinguish between the two.
159  */
__gnix_ccm_init(void)160 static int __gnix_ccm_init(void)
161 {
162 	int rc, fd;
163 	FILE *f;
164 	char *nodefile;
165 	char nodelist[PATH_MAX];
166 	const char *home;
167 	ccm_alps_info_t info;
168 	uint32_t num_nids = 0;
169 
170 	GNIX_DEBUG(FI_LOG_FABRIC, "Reading job info file %s\n",
171 		   CCM_ALPS_INFO_FILE);
172 
173 	fd = open(CCM_ALPS_INFO_FILE, O_RDONLY);
174 	if (fd < 0) {
175 		return -FI_EIO;
176 	}
177 
178 	rc = read(fd, &info, sizeof(ccm_alps_info_t));
179 	if (rc != sizeof(ccm_alps_info_t))
180 		return -FI_EIO;
181 
182 	gnix_app_ptag = info.ptag;
183 	gnix_app_cookie = info.cookie;
184 
185 	close(fd);
186 	GNIX_DEBUG(FI_LOG_FABRIC, "Ptag=0x%x, cookie=0x%x\n",
187 		   gnix_app_ptag, gnix_app_cookie);
188 
189 	home = getenv("HOME");
190 	/* use the WLM node file if using PBS */
191 	nodefile = getenv("PBS_NODEFILE");
192 	if (!nodefile) {
193 		const char *jobid = getenv("SLURM_JOB_ID");
194 		if (!jobid) {
195 			jobid = getenv("SLURM_JOBID");
196 		}
197 		snprintf(nodelist, PATH_MAX, "%s/%s%s", home ? home : ".",
198 			 CCM_NODELIST_FN, jobid ? jobid : "sdb");
199 		nodefile = nodelist;
200 	}
201 	f = fopen(nodefile, "r");
202 	if (f) {
203 		char mynid[PATH_MAX];
204 		char next_nid[PATH_MAX];
205 
206 		rc = gethostname(mynid, PATH_MAX);
207 		if (rc) {
208 			/* use the first address */
209 			rc = fscanf(f, "%s\n", mynid);
210 			/* assume this one worked, error case is same */
211 			num_nids++;
212 		}
213 		while (true) {
214 			rc = fscanf(f, "%s\n", next_nid);
215 			if (rc == 1) {
216 				if (strcmp(mynid, next_nid) == 0) {
217 					num_nids++;
218 				}
219 			} else {
220 				break;
221 			}
222 		}
223 		gnix_pes_on_node = num_nids;
224 		fclose(f);
225 	} else {
226 		/* what would be a better default? */
227 		GNIX_WARN(FI_LOG_FABRIC,
228 			  "CCM nodelist not found.  Assuming 1 PE per node\n");
229 		gnix_pes_on_node = 1;
230 	}
231 	GNIX_DEBUG(FI_LOG_FABRIC, "pes per node=%u\n", gnix_pes_on_node);
232 
233 	/* Don't really need to do this here, but wanted to be clear */
234 	gnix_app_placementList = NULL;
235 	gnix_app_targetNids = NULL;
236 	gnix_app_targetPes = NULL;
237 	gnix_app_targetLen = NULL;
238 	gnix_app_targetIps = NULL;
239 	gnix_app_startPe = NULL;
240 	gnix_app_totalPes = NULL;
241 	gnix_app_nodePes = NULL;
242 	gnix_app_peCpus = NULL;
243 
244 	ccm_init = true;
245 	return FI_SUCCESS;
246 }
247 
__gnix_alps_init(void)248 static int __gnix_alps_init(void)
249 {
250 	char *cptr = NULL;
251 	int ret = FI_SUCCESS;
252 	int my_pe = -1;
253 	int alps_status = 0;
254 	size_t alps_count;
255 	alpsAppLLIGni_t *rdmacred_rsp = NULL;
256 	alpsAppGni_t *rdmacred_buf = NULL;
257 
258 	fastlock_acquire(&__gnix_alps_lock);
259 	/* lli_lock doesn't return anything useful */
260 	ret = alps_app_lli_lock();
261 
262 	if (alps_init) {
263 		/* alps lli lock protects alps_init for now */
264 		alps_app_lli_unlock();
265 		fastlock_release(&__gnix_alps_lock);
266 		return ret;
267 	}
268 
269 	/*
270 	 * First get our apid
271 	 */
272 	ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
273 	if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
274 		GNIX_WARN(FI_LOG_FABRIC, "lli put failed, ret=%d(%s)\n", ret,
275 			  strerror(errno));
276 		ret = -FI_EIO;
277 		goto err;
278 	}
279 
280 	ret = alps_app_lli_get_response(&alps_status, &alps_count);
281 	if (alps_status != ALPS_APP_LLI_ALPS_STAT_OK) {
282 		GNIX_WARN(FI_LOG_FABRIC, "lli get response failed, "
283 			  "alps_status=%d(%s)\n", alps_status,
284 			  strerror(errno));
285 		ret = -FI_EIO;
286 		goto err;
287 	}
288 
289 	ret = alps_app_lli_get_response_bytes(&gnix_apid, sizeof(gnix_apid));
290 	if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
291 		GNIX_WARN(FI_LOG_FABRIC,
292 			  "lli get response failed, ret=%d(%s)\n",
293 			  ret, strerror(errno));
294 		ret = -FI_EIO;
295 		goto err;
296 	}
297 
298 	/*
299 	 * now get the GNI rdma credentials info
300 	 */
301 	ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_GNI, NULL, 0);
302 	if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
303 		GNIX_WARN(FI_LOG_FABRIC, "lli put failed, ret=%d(%s)\n",
304 			  ret, strerror(errno));
305 		ret = -FI_EIO;
306 		goto err;
307 	}
308 
309 	ret = alps_app_lli_get_response(&alps_status, &alps_count);
310 	if (alps_status != ALPS_APP_LLI_ALPS_STAT_OK) {
311 		GNIX_WARN(FI_LOG_FABRIC,
312 			  "lli get response failed, alps_status=%d(%s)\n",
313 			  alps_status, strerror(errno));
314 		ret = -FI_EIO;
315 		goto err;
316 	}
317 
318 	rdmacred_rsp = malloc(alps_count);
319 	if (rdmacred_rsp == NULL) {
320 		ret = -FI_ENOMEM;
321 		goto err;
322 	}
323 
324 	memset(rdmacred_rsp, 0, alps_count);
325 
326 	ret = alps_app_lli_get_response_bytes(rdmacred_rsp, alps_count);
327 	if (ret != ALPS_APP_LLI_ALPS_STAT_OK) {
328 		GNIX_WARN(FI_LOG_FABRIC,
329 			  "lli get response failed, ret=%d(%s)\n",
330 			  ret, strerror(errno));
331 		ret = -FI_EIO;
332 		goto err;
333 	}
334 
335 	rdmacred_buf = (alpsAppGni_t *) rdmacred_rsp->u.buf;
336 
337 	/*
338 	 * just use the first ptag/cookie for now
339 	 */
340 
341 	gnix_app_ptag = rdmacred_buf[0].ptag;
342 	gnix_app_cookie = rdmacred_buf[0].cookie;
343 
344 	/*
345 	 * alps_get_placement_info(uint64_t apid, alpsAppLayout_t *appLayout,
346 	 *	int **placementList, int **targetNids, int **targetPes,
347 	 *	int **targetLen, struct in_addr **targetIps, int **startPe,
348 	 *	int **totalPes, int **nodePes, int **peCpus);
349 	 */
350 	ret = alps_get_placement_info(gnix_apid, &gnix_appLayout,
351 				      &gnix_app_placementList,
352 				      &gnix_app_targetNids,
353 				      &gnix_app_targetPes,
354 				      &gnix_app_targetLen,
355 				      &gnix_app_targetIps,
356 				      &gnix_app_startPe,
357 				      &gnix_app_totalPes,
358 				      &gnix_app_nodePes,
359 				      &gnix_app_peCpus);
360 	if (ret != 1) {
361 		GNIX_WARN(FI_LOG_FABRIC,
362 			  "alps_get_placement_info failed, ret=%d(%s)\n",
363 			  ret, strerror(errno));
364 		ret = -FI_EIO;
365 		goto err;
366 	}
367 
368 	gnix_pes_on_node = gnix_appLayout.numPesHere;
369 	gnix_first_pe_on_node = gnix_appLayout.firstPe;
370 
371 	if ((cptr = getenv("PMI_FORK_RANK")) != NULL) {
372 		my_pe = atoi(cptr);
373 	} else {
374 		if ((cptr = getenv("ALPS_APP_PE")) != NULL) {
375 			my_pe = atoi(cptr);
376 		}
377 	}
378 
379 	/*
380  	 * compute local pe rank, assuming we got our global PE rank
381  	 * via either an ALPS (or ALPS SLURM plugin) or Cray PMI,
382  	 * otherwise set to -1.
383  	 */
384 	if (my_pe != -1)
385 		gnix_pe_node_rank = my_pe - gnix_first_pe_on_node;
386 
387 	alps_init = true;
388 
389 	ret = 0;
390 err:
391 	alps_app_lli_unlock();
392 	fastlock_release(&__gnix_alps_lock);
393 	if (rdmacred_rsp != NULL) {
394 		free(rdmacred_rsp);
395 	}
396 
397 	return ret;
398 }
399 
__gnix_app_init(void)400 static int __gnix_app_init(void)
401 {
402 	int ret;
403 
404 	if (app_init) {
405 		return FI_SUCCESS;
406 	}
407 
408 	/* Try CCM first */
409 	ret = __gnix_ccm_init();
410 	if (ret) {
411 		ret = __gnix_alps_init();
412 	}
413 
414 	if (ret == FI_SUCCESS) {
415 		app_init = true;
416 	}
417 
418 	gnix_device_id = 0;
419 	return ret;
420 
421 }
422 
gnixu_get_rdma_credentials(void * addr,uint8_t * ptag,uint32_t * cookie)423 int gnixu_get_rdma_credentials(void *addr, uint8_t *ptag, uint32_t *cookie)
424 {
425 	int ret = FI_SUCCESS;
426 
427 	/*TODO: If addr is used, ensure that ep->info->addr_format is checked*/
428 
429 	if ((ptag == NULL) || (cookie == NULL)) {
430 		return -FI_EINVAL;
431 	}
432 
433 	ret = __gnix_app_init();
434 	if (ret) {
435 		GNIX_WARN(FI_LOG_FABRIC,
436 			  "__gnix_app_init() failed, ret=%d(%s)\n",
437 			  ret, strerror(errno));
438 		return ret;
439 	}
440 
441 	/*
442 	 * TODO: need to handle non null addr differently at some point,
443 	 * a non-NULL addr can be used to acquire RDMA credentials other than
444 	 * those assigned by ALPS/nativized slurm.
445 	 */
446 	*ptag = gnix_app_ptag;
447 	*cookie = gnix_app_cookie;
448 
449 	return ret;
450 }
451 
452 
453 #define NUM_GNI_RC (GNI_RC_ERROR_NOMEM+1)
454 static int gnix_rc_table[NUM_GNI_RC] = {
455 	[GNI_RC_SUCCESS] = FI_SUCCESS,
456 	[GNI_RC_NOT_DONE] = -FI_EAGAIN,
457 	[GNI_RC_INVALID_PARAM] = -FI_EINVAL,
458 	[GNI_RC_ERROR_RESOURCE] = -FI_EBUSY,
459 	[GNI_RC_TIMEOUT] = -FI_ETIMEDOUT,
460 	[GNI_RC_PERMISSION_ERROR] = -FI_EACCES,
461 	[GNI_RC_DESCRIPTOR_ERROR] = -FI_EOTHER,
462 	[GNI_RC_ALIGNMENT_ERROR] = -FI_EINVAL,
463 	[GNI_RC_INVALID_STATE] = -FI_EOPBADSTATE,
464 	[GNI_RC_NO_MATCH] = -FI_EINVAL,
465 	[GNI_RC_SIZE_ERROR] = -FI_ETOOSMALL,
466 	[GNI_RC_TRANSACTION_ERROR] = -FI_ECANCELED,
467 	[GNI_RC_ILLEGAL_OP] = -FI_EOPNOTSUPP,
468 	[GNI_RC_ERROR_NOMEM] = -FI_ENOMEM
469 };
470 
gnixu_to_fi_errno(int err)471 int gnixu_to_fi_errno(int err)
472 {
473 	if (err >= 0 && err < NUM_GNI_RC)
474 		return gnix_rc_table[err];
475 	else
476 		return -FI_EOTHER;
477 }
478 
479 /* Indicate that the next task spawned will be restricted to cores assigned to
480  * corespec. */
_gnix_task_is_not_app(void)481 int _gnix_task_is_not_app(void)
482 {
483 	size_t count;
484 	int fd;
485 	char filename[PATH_MAX];
486 	int rc = 0;
487 	char val_str[] = "0";
488 	int val_str_len = strlen(val_str);
489 
490 	snprintf(filename, PATH_MAX, "/proc/self/task/%ld/task_is_app",
491 		      syscall(SYS_gettid));
492 	fd = open(filename, O_WRONLY);
493 	if (fd < 0) {
494 		GNIX_WARN(FI_LOG_FABRIC, "open(%s) failed, errno=%s\n",
495 			  filename, strerror(errno));
496 		return -errno;
497 	}
498 
499 	count = write(fd, val_str, val_str_len);
500 	if (count != val_str_len) {
501 		GNIX_WARN(FI_LOG_FABRIC, "write(%s, %s) failed, errno=%s\n",
502 			  filename, val_str, strerror(errno));
503 		rc = -errno;
504 	}
505 	close(fd);
506 
507 	return rc;
508 }
509 
gnix_write_proc_job(char * val_str)510 static int gnix_write_proc_job(char *val_str)
511 {
512 	size_t count;
513 	int fd;
514 	int rc = 0;
515 	char *filename = "/proc/job";
516 	int val_str_len = strlen(val_str);
517 
518 	fd = open(filename, O_WRONLY);
519 	if (fd < 0) {
520 		GNIX_WARN(FI_LOG_FABRIC, "open(%s) failed, errno=%s\n",
521 			  filename, strerror(errno));
522 		return -errno;
523 	}
524 
525 	count = write(fd, val_str, val_str_len);
526 	if (count != val_str_len) {
527 		GNIX_WARN(FI_LOG_FABRIC, "write(%s) failed, errno=%s\n",
528 			  val_str, strerror(errno));
529 		rc = -errno;
530 	}
531 	close(fd);
532 
533 	return rc;
534 }
535 
536 /* Indicate that the next task spawned will be restricted to CPUs that are not
537  * assigned to the app and not assigned to corespec. */
_gnix_job_enable_unassigned_cpus(void)538 int _gnix_job_enable_unassigned_cpus(void)
539 {
540 	return gnix_write_proc_job("enable_affinity_unassigned_cpus");
541 }
542 
543 /* Indicate that the next task spawned will be restricted to CPUs that are
544  * assigned to the app. */
_gnix_job_disable_unassigned_cpus(void)545 int _gnix_job_disable_unassigned_cpus(void)
546 {
547 	return gnix_write_proc_job("disable_affinity_unassigned_cpus");
548 }
549 
550 /* Indicate that the next task spawned should adhere to the affinity rules. */
_gnix_job_enable_affinity_apply(void)551 int _gnix_job_enable_affinity_apply(void)
552 {
553 	return gnix_write_proc_job("enable_affinity_apply");
554 }
555 
556 /* Indicate that the next task spawned should avoid the affinity rules and be
557  * allowed to run anywhere in the app cpuset. */
_gnix_job_disable_affinity_apply(void)558 int _gnix_job_disable_affinity_apply(void)
559 {
560 	return gnix_write_proc_job("disable_affinity_apply");
561 }
562 
563 
_gnix_job_fma_limit(uint32_t dev_id,uint8_t ptag,uint32_t * limit)564 int _gnix_job_fma_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit)
565 {
566 	gni_return_t status;
567 	gni_job_res_desc_t job_res_desc;
568 
569 	if (!limit) {
570 		return -FI_EINVAL;
571 	}
572 
573 	status = GNI_GetJobResInfo(dev_id, ptag, GNI_JOB_RES_FMA, &job_res_desc);
574 	if (status) {
575 		GNIX_WARN(FI_LOG_FABRIC,
576 			  "GNI_GetJobResInfo(%d, %d) failed, status=%s\n",
577 			  dev_id, ptag, gni_err_str[status]);
578 		return -FI_EINVAL;
579 	}
580 
581 	*limit = job_res_desc.limit;
582 	GNIX_INFO(FI_LOG_FABRIC, "fma_limit: %u\n", job_res_desc.limit);
583 
584 	return FI_SUCCESS;
585 }
586 
_gnix_job_cq_limit(uint32_t dev_id,uint8_t ptag,uint32_t * limit)587 int _gnix_job_cq_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit)
588 {
589 	gni_return_t status;
590 	gni_job_res_desc_t job_res_desc;
591 
592 	if (!limit) {
593 		return -FI_EINVAL;
594 	}
595 
596 	status = GNI_GetJobResInfo(dev_id, ptag, GNI_JOB_RES_CQ, &job_res_desc);
597 	if (status) {
598 		GNIX_WARN(FI_LOG_FABRIC,
599 			  "GNI_GetJobResInfo(%d, %d) failed, status=%s\n",
600 			  dev_id, ptag, gni_err_str[status]);
601 		return -FI_EINVAL;
602 	}
603 
604 	*limit = job_res_desc.limit;
605 	GNIX_INFO(FI_LOG_FABRIC, "cq_limit: %u\n", job_res_desc.limit);
606 
607 	return FI_SUCCESS;
608 }
609 
_gnix_pes_on_node(uint32_t * num_pes)610 int _gnix_pes_on_node(uint32_t *num_pes)
611 {
612 	int rc;
613 
614 	if (!num_pes) {
615 		return -FI_EINVAL;
616 	}
617 
618 	rc = __gnix_app_init();
619 	if (rc) {
620 		GNIX_WARN(FI_LOG_FABRIC,
621 			  "__gnix_app_init() failed, ret=%d(%s)\n",
622 			  rc, strerror(errno));
623 		return rc;
624 	}
625 
626 	*num_pes = gnix_pes_on_node;
627 	GNIX_INFO(FI_LOG_FABRIC, "num_pes: %u\n", gnix_appLayout.numPesHere);
628 
629 	return FI_SUCCESS;
630 }
631 
_gnix_pe_node_rank(int * pe_node_rank)632 int _gnix_pe_node_rank(int *pe_node_rank)
633 {
634 	int rc;
635 
636 	if (!pe_node_rank) {
637 		return -FI_EINVAL;
638 	}
639 
640 	rc = __gnix_app_init();
641 	if (rc) {
642 		GNIX_WARN(FI_LOG_FABRIC,
643 			  "__gnix_app_init() failed, ret=%d(%s)\n",
644 			  rc, strerror(errno));
645 		return rc;
646 	}
647 
648 	if (gnix_pe_node_rank != -1) {
649 		*pe_node_rank = gnix_pe_node_rank;
650 		rc = FI_SUCCESS;
651 	} else
652 		rc = -FI_EADDRNOTAVAIL;
653 
654 	GNIX_INFO(FI_LOG_FABRIC, "pe_node_rank: %u\n", gnix_pe_node_rank);
655 
656 	return rc;
657 }
658 
_gnix_nics_per_rank(uint32_t * nics_per_rank)659 int _gnix_nics_per_rank(uint32_t *nics_per_rank)
660 {
661 	int rc;
662 	uint32_t npes, fmas, cqs, limiting_resource;
663 
664 	if (!nics_per_rank) {
665 		return -FI_EINVAL;
666 	}
667 
668 	rc = __gnix_app_init();
669 	if (rc) {
670 		GNIX_WARN(FI_LOG_FABRIC,
671 			  "__gnix_app_init() failed, ret=%d(%s)\n",
672 			  rc, strerror(errno));
673 		return rc;
674 	}
675 
676 	rc = _gnix_job_fma_limit(gnix_device_id, gnix_app_ptag, &fmas);
677 	if (rc) {
678 		return rc;
679 	}
680 
681 	rc = _gnix_job_cq_limit(gnix_device_id, gnix_app_ptag, &cqs);
682 	if (rc) {
683 		return rc;
684 	}
685 
686 	gnix_cq_limit = cqs;
687 	cqs /= GNIX_CQS_PER_EP;
688 
689 	rc = _gnix_pes_on_node(&npes);
690 	if (rc) {
691 		return rc;
692 	}
693 
694 	limiting_resource = fmas > cqs ? cqs : fmas;
695 
696 	*nics_per_rank = limiting_resource / npes;
697 
698 	return FI_SUCCESS;
699 }
700 
_gnix_dump_gni_res(uint8_t ptag)701 void _gnix_dump_gni_res(uint8_t ptag)
702 {
703 	int i;
704 	gni_return_t status;
705 	gni_dev_res_desc_t dev_res_desc;
706 	gni_job_res_desc_t job_res_desc;
707 #define BUF_SZ 4096
708 	char buf[BUF_SZ];
709 	int size = BUF_SZ, written = 0;
710 
711 	if (!fi_log_enabled(&gnix_prov, FI_LOG_WARN, FI_LOG_FABRIC))
712 		return;
713 
714 	written += snprintf(buf + written, size - written,
715 			    "Device Resources:\n");
716 	for (i = GNI_DEV_RES_FIRST+1; i < GNI_DEV_RES_LAST; i++) {
717 		status = GNI_GetDevResInfo(0, i, &dev_res_desc);
718 		if (status == GNI_RC_SUCCESS) {
719 			written += snprintf(buf + written, size - written,
720 					    "dev res: %9s, avail: %lu res: %lu held: %lu total: %lu\n",
721 					    gni_dev_res_to_str(i),
722 					    dev_res_desc.available,
723 					    dev_res_desc.reserved,
724 					    dev_res_desc.held,
725 					    dev_res_desc.total);
726 		}
727 	}
728 
729 	GNIX_WARN(FI_LOG_FABRIC, "%s", buf);
730 
731 	written = 0;
732 	written += snprintf(buf + written, size - written,
733 			    "Job Resources:\n");
734 	for (i = GNI_JOB_RES_FIRST+1; i < GNI_JOB_RES_LAST; i++) {
735 		status = GNI_GetJobResInfo(0, ptag, i, &job_res_desc);
736 		if (status == GNI_RC_SUCCESS) {
737 			written += snprintf(buf + written, size - written,
738 					    "ptag[%d] job res: %9s used: %lu limit: %lu\n",
739 					    ptag, gni_job_res_to_str(i),
740 					    job_res_desc.used,
741 					    job_res_desc.limit);
742 		}
743 	}
744 
745 	GNIX_WARN(FI_LOG_FABRIC, "%s", buf);
746 }
747 
_gnix_get_num_corespec_cpus(uint32_t * num_core_spec_cpus)748 int _gnix_get_num_corespec_cpus(uint32_t *num_core_spec_cpus)
749 {
750 	int ret = -FI_ENODATA;
751 	int ncpus = 0;
752 	FILE *fd = NULL;
753 	char buffer[4096], *line, *field;
754 	static bool already_called;
755 	static uint32_t cached_num_corespec_cpus;
756 
757 	if (num_core_spec_cpus == NULL)
758 		return -FI_EINVAL;
759 
760 	if (already_called == true) {
761 		*num_core_spec_cpus = cached_num_corespec_cpus;
762 		return FI_SUCCESS;
763 	}
764 
765 	fd = fopen("/proc/job", "r");
766 	if (!fd) {
767 		GNIX_WARN(FI_LOG_FABRIC,
768 			  "open of /proc/job returned %s", strerror(errno));
769 		return -errno;
770 	}
771 
772 	while (1) {
773 		line = fgets(buffer, sizeof(buffer), fd);
774 		if (!line)
775 			break;
776 
777 		line = strstr(line, "corespec");
778 		if (line != NULL) {
779 			field  = strtok(line, " ");
780 			field  = strtok(NULL, " ");
781 			if (!strcmp(field, "num_sys_cpus")) {
782 				field = strtok(NULL, " ");
783 				ncpus = atoi(field);
784 			}
785 			ret = FI_SUCCESS;
786 			break;
787 		}
788 	}
789 
790 	*num_core_spec_cpus = ncpus;
791 	cached_num_corespec_cpus = ncpus;
792 
793 	already_called = true;
794 
795 	fclose(fd);
796 
797 	return ret;
798 }
799 
800