1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Joyent, Inc.
14  */
15 
16 /*
17  * To perform a merge of two CTF containers, we first diff the two containers
18  * types. For every type that's in the src container, but not in the dst
19  * container, we note it and add it to dst container. If there are any objects
20  * or functions associated with src, we go through and update the types that
21  * they refer to such that they all refer to types in the dst container.
22  *
23  * The bulk of the logic for the merge, after we've run the diff, occurs in
24  * ctf_merge_common().
25  *
26  * In terms of exported APIs, we don't really export a simple merge two
27  * containers, as the general way this is used, in something like ctfmerge(1),
28  * is to add all the containers and then let us figure out the best way to merge
29  * it.
30  */
31 
32 #include <libctf_impl.h>
33 #include <sys/debug.h>
34 #include <sys/list.h>
35 #include <stddef.h>
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <mergeq.h>
40 #include <errno.h>
41 
42 typedef struct ctf_merge_tinfo {
43 	uint16_t cmt_map;	/* Map to the type in out */
44 	boolean_t cmt_fixup;
45 	boolean_t cmt_forward;
46 	boolean_t cmt_missing;
47 } ctf_merge_tinfo_t;
48 
49 /*
50  * State required for doing an individual merge of two containers.
51  */
52 typedef struct ctf_merge_types {
53 	ctf_file_t *cm_out;		/* Output CTF file */
54 	ctf_file_t *cm_src;		/* Input CTF file */
55 	ctf_merge_tinfo_t *cm_tmap;	/* Type state information */
56 	boolean_t cm_dedup;		/* Are we doing a dedup? */
57 	boolean_t cm_unique;		/* are we doing a uniquify? */
58 } ctf_merge_types_t;
59 
60 typedef struct ctf_merge_objmap {
61 	list_node_t cmo_node;
62 	const char *cmo_name;		/* Symbol name */
63 	const char *cmo_file;		/* Symbol file */
64 	ulong_t cmo_idx;		/* Symbol ID */
65 	Elf64_Sym cmo_sym;		/* Symbol Entry */
66 	ctf_id_t cmo_tid;		/* Type ID */
67 } ctf_merge_objmap_t;
68 
69 typedef struct ctf_merge_funcmap {
70 	list_node_t cmf_node;
71 	const char *cmf_name;		/* Symbol name */
72 	const char *cmf_file;		/* Symbol file */
73 	ulong_t cmf_idx;		/* Symbol ID */
74 	Elf64_Sym cmf_sym;		/* Symbol Entry */
75 	ctf_id_t cmf_rtid;		/* Type ID */
76 	uint_t cmf_flags;		/* ctf_funcinfo_t ctc_flags */
77 	uint_t cmf_argc;		/* Number of arguments */
78 	ctf_id_t cmf_args[];		/* Types of arguments */
79 } ctf_merge_funcmap_t;
80 
81 typedef struct ctf_merge_input {
82 	list_node_t cmi_node;
83 	ctf_file_t *cmi_input;
84 	list_t cmi_omap;
85 	list_t cmi_fmap;
86 	boolean_t cmi_created;
87 } ctf_merge_input_t;
88 
89 struct ctf_merge_handle {
90 	list_t cmh_inputs;		/* Input list */
91 	uint_t cmh_ninputs;		/* Number of inputs */
92 	uint_t cmh_nthreads;		/* Number of threads to use */
93 	ctf_file_t *cmh_unique;		/* ctf to uniquify against */
94 	boolean_t cmh_msyms;		/* Should we merge symbols/funcs? */
95 	int cmh_ofd;			/* FD for output file */
96 	int cmh_flags;			/* Flags that control merge behavior */
97 	char *cmh_label;		/* Optional label */
98 	char *cmh_pname;		/* Parent name */
99 };
100 
101 typedef struct ctf_merge_symbol_arg {
102 	list_t *cmsa_objmap;
103 	list_t *cmsa_funcmap;
104 	ctf_file_t *cmsa_out;
105 	boolean_t cmsa_dedup;
106 } ctf_merge_symbol_arg_t;
107 
108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
109 
110 static ctf_id_t
111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
112 {
113 	if (cmp->cm_dedup == B_FALSE) {
114 		VERIFY(cmp->cm_tmap[id].cmt_map != 0);
115 		return (cmp->cm_tmap[id].cmt_map);
116 	}
117 
118 	while (cmp->cm_tmap[id].cmt_missing == B_FALSE) {
119 		VERIFY(cmp->cm_tmap[id].cmt_map != 0);
120 		id = cmp->cm_tmap[id].cmt_map;
121 	}
122 	VERIFY(cmp->cm_tmap[id].cmt_map != 0);
123 	return (cmp->cm_tmap[id].cmt_map);
124 }
125 
126 static void
127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
128     ctf_id_t oid, void *arg)
129 {
130 	ctf_merge_types_t *cmp = arg;
131 	ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
132 	uint_t kind;
133 
134 	if (same == B_TRUE) {
135 		if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD &&
136 		    (kind = ctf_type_kind(ofp, oid)) != CTF_K_FORWARD) {
137 			VERIFY(cmt[oid].cmt_map == 0);
138 
139 			/*
140 			 * If we're uniquifying types, it's possible for the
141 			 * container that we're uniquifying against to have a
142 			 * forward which exists in the container being reduced.
143 			 * For example, genunix has the machcpu structure as a
144 			 * forward which is actually in unix and we uniquify
145 			 * unix against genunix. In such cases, we explicitly do
146 			 * not do any mapping of the forward information, lest
147 			 * we risk losing the real definition. Instead, mark
148 			 * that it's missing.
149 			 */
150 			if (cmp->cm_unique == B_TRUE) {
151 				cmt[oid].cmt_missing = B_TRUE;
152 				return;
153 			}
154 
155 			cmt[oid].cmt_map = iid;
156 			cmt[oid].cmt_forward = B_TRUE;
157 			ctf_dprintf("merge diff forward mapped %ld->%ld (%u)\n",
158 			    oid, iid, kind);
159 			return;
160 		}
161 
162 		/*
163 		 * We could have multiple things that a given type ends up
164 		 * matching in the world of forwards and pointers to forwards.
165 		 * For now just take the first one...
166 		 */
167 		if (cmt[oid].cmt_map != 0)
168 			return;
169 		cmt[oid].cmt_map = iid;
170 		ctf_dprintf("merge diff mapped %d->%d\n", oid, iid);
171 	} else if (ifp == cmp->cm_src) {
172 		VERIFY(cmt[iid].cmt_map == 0);
173 		cmt[iid].cmt_missing = B_TRUE;
174 		ctf_dprintf("merge diff said %d is missing\n", iid);
175 	}
176 }
177 
178 static int
179 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id)
180 {
181 	int ret, flags;
182 	const ctf_type_t *tp;
183 	const char *name;
184 	ctf_encoding_t en;
185 
186 	if (ctf_type_encoding(cmp->cm_src, id, &en) != 0)
187 		return (CTF_ERR);
188 
189 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
190 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
191 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
192 		flags = CTF_ADD_ROOT;
193 	else
194 		flags = CTF_ADD_NONROOT;
195 
196 	ret = ctf_add_encoded(cmp->cm_out, flags, name, &en,
197 	    ctf_type_kind(cmp->cm_src, id));
198 
199 	if (ret == CTF_ERR)
200 		return (ret);
201 
202 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
203 	cmp->cm_tmap[id].cmt_map = ret;
204 	return (0);
205 }
206 
207 static int
208 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id)
209 {
210 	int ret, flags;
211 	const ctf_type_t *tp;
212 	ctf_arinfo_t ar;
213 
214 	if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR)
215 		return (CTF_ERR);
216 
217 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
218 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
219 		flags = CTF_ADD_ROOT;
220 	else
221 		flags = CTF_ADD_NONROOT;
222 
223 	if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) {
224 		ret = ctf_merge_add_type(cmp, ar.ctr_contents);
225 		if (ret != 0)
226 			return (ret);
227 		ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0);
228 	}
229 	ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents);
230 
231 	if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) {
232 		ret = ctf_merge_add_type(cmp, ar.ctr_index);
233 		if (ret != 0)
234 			return (ret);
235 		ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0);
236 	}
237 	ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index);
238 
239 	ret = ctf_add_array(cmp->cm_out, flags, &ar);
240 	if (ret == CTF_ERR)
241 		return (ret);
242 
243 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
244 	cmp->cm_tmap[id].cmt_map = ret;
245 
246 	return (0);
247 }
248 
249 static int
250 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id)
251 {
252 	int ret, flags;
253 	const ctf_type_t *tp;
254 	ctf_id_t reftype;
255 	const char *name;
256 
257 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
258 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
259 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
260 		flags = CTF_ADD_ROOT;
261 	else
262 		flags = CTF_ADD_NONROOT;
263 
264 	reftype = ctf_type_reference(cmp->cm_src, id);
265 	if (reftype == CTF_ERR)
266 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
267 
268 	if (cmp->cm_tmap[reftype].cmt_map == 0) {
269 		ret = ctf_merge_add_type(cmp, reftype);
270 		if (ret != 0)
271 			return (ret);
272 		ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
273 	}
274 	reftype = ctf_merge_gettype(cmp, reftype);
275 
276 	ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype,
277 	    ctf_type_kind(cmp->cm_src, id));
278 	if (ret == CTF_ERR)
279 		return (ret);
280 
281 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
282 	cmp->cm_tmap[id].cmt_map = ret;
283 	return (0);
284 }
285 
286 static int
287 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id)
288 {
289 	int ret, flags;
290 	const ctf_type_t *tp;
291 	const char *name;
292 	ctf_id_t reftype;
293 
294 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
295 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
296 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
297 		flags = CTF_ADD_ROOT;
298 	else
299 		flags = CTF_ADD_NONROOT;
300 
301 	reftype = ctf_type_reference(cmp->cm_src, id);
302 	if (reftype == CTF_ERR)
303 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
304 
305 	if (cmp->cm_tmap[reftype].cmt_map == 0) {
306 		ret = ctf_merge_add_type(cmp, reftype);
307 		if (ret != 0)
308 			return (ret);
309 		ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
310 	}
311 	reftype = ctf_merge_gettype(cmp, reftype);
312 
313 	ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype);
314 	if (ret == CTF_ERR)
315 		return (ret);
316 
317 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
318 	cmp->cm_tmap[id].cmt_map = ret;
319 	return (0);
320 }
321 
322 typedef struct ctf_merge_enum {
323 	ctf_file_t *cme_fp;
324 	ctf_id_t cme_id;
325 } ctf_merge_enum_t;
326 
327 static int
328 ctf_merge_add_enumerator(const char *name, int value, void *arg)
329 {
330 	ctf_merge_enum_t *cmep = arg;
331 
332 	return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) ==
333 	    CTF_ERR);
334 }
335 
336 static int
337 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id)
338 {
339 	int flags;
340 	const ctf_type_t *tp;
341 	const char *name;
342 	ctf_id_t enumid;
343 	ctf_merge_enum_t cme;
344 	size_t size;
345 
346 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
347 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
348 		flags = CTF_ADD_ROOT;
349 	else
350 		flags = CTF_ADD_NONROOT;
351 
352 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
353 	size = ctf_get_ctt_size(cmp->cm_src, tp, NULL, NULL);
354 
355 	enumid = ctf_add_enum(cmp->cm_out, flags, name, size);
356 	if (enumid == CTF_ERR)
357 		return (enumid);
358 
359 	cme.cme_fp = cmp->cm_out;
360 	cme.cme_id = enumid;
361 	if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator,
362 	    &cme) != 0)
363 		return (CTF_ERR);
364 
365 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
366 	cmp->cm_tmap[id].cmt_map = enumid;
367 	return (0);
368 }
369 
370 static int
371 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id)
372 {
373 	int ret, flags, i;
374 	const ctf_type_t *tp;
375 	ctf_funcinfo_t ctc;
376 	ctf_id_t *argv;
377 
378 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
379 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
380 		flags = CTF_ADD_ROOT;
381 	else
382 		flags = CTF_ADD_NONROOT;
383 
384 	if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR)
385 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
386 
387 	argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc);
388 	if (argv == NULL)
389 		return (ctf_set_errno(cmp->cm_out, ENOMEM));
390 	if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) ==
391 	    CTF_ERR) {
392 		ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
393 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
394 	}
395 
396 	if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) {
397 		ret = ctf_merge_add_type(cmp, ctc.ctc_return);
398 		if (ret != 0)
399 			return (ret);
400 		ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0);
401 	}
402 	ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return);
403 
404 	for (i = 0; i < ctc.ctc_argc; i++) {
405 		if (cmp->cm_tmap[argv[i]].cmt_map == 0) {
406 			ret = ctf_merge_add_type(cmp, argv[i]);
407 			if (ret != 0)
408 				return (ret);
409 			ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0);
410 		}
411 		argv[i] = ctf_merge_gettype(cmp, argv[i]);
412 	}
413 
414 	ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv);
415 	ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
416 	if (ret == CTF_ERR)
417 		return (ret);
418 
419 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
420 	cmp->cm_tmap[id].cmt_map = ret;
421 	return (0);
422 }
423 
424 static int
425 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id, uint_t kind)
426 {
427 	int ret, flags;
428 	const ctf_type_t *tp;
429 	const char *name;
430 
431 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
432 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
433 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
434 		flags = CTF_ADD_ROOT;
435 	else
436 		flags = CTF_ADD_NONROOT;
437 
438 	ret = ctf_add_forward(cmp->cm_out, flags, name, kind);
439 	if (ret == CTF_ERR)
440 		return (CTF_ERR);
441 
442 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
443 	cmp->cm_tmap[id].cmt_map = ret;
444 	return (0);
445 }
446 
447 typedef struct ctf_merge_su {
448 	ctf_merge_types_t *cms_cm;
449 	ctf_id_t cms_id;
450 } ctf_merge_su_t;
451 
452 static int
453 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg)
454 {
455 	ctf_merge_su_t *cms = arg;
456 
457 	VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0);
458 	type = cms->cms_cm->cm_tmap[type].cmt_map;
459 
460 	ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id);
461 	return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name,
462 	    type, offset) == CTF_ERR);
463 }
464 
465 /*
466  * During the first pass, we always add the generic structure and union but none
467  * of its members as they might not all have been mapped yet. Instead we just
468  * mark all structures and unions as needing to be fixed up.
469  */
470 static int
471 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward)
472 {
473 	int flags, kind;
474 	const ctf_type_t *tp;
475 	const char *name;
476 	ctf_id_t suid;
477 
478 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
479 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
480 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
481 		flags = CTF_ADD_ROOT;
482 	else
483 		flags = CTF_ADD_NONROOT;
484 	kind = ctf_type_kind(cmp->cm_src, id);
485 
486 	if (kind == CTF_K_STRUCT)
487 		suid = ctf_add_struct(cmp->cm_out, flags, name);
488 	else
489 		suid = ctf_add_union(cmp->cm_out, flags, name);
490 
491 	ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id, suid);
492 
493 	if (suid == CTF_ERR)
494 		return (suid);
495 
496 	if (forward == B_FALSE) {
497 		VERIFY(cmp->cm_tmap[id].cmt_map == 0);
498 		cmp->cm_tmap[id].cmt_map = suid;
499 	} else {
500 		/*
501 		 * If this is a forward reference then its mapping should
502 		 * already exist.
503 		 */
504 		if (cmp->cm_tmap[id].cmt_map != suid) {
505 			ctf_dprintf(
506 			    "mismatch sou \"%s\" as (%d) %d->%d (exp %d)\n",
507 			    name, kind, id, suid, cmp->cm_tmap[id].cmt_map);
508 			ctf_hash_dump("src structs",
509 			    &cmp->cm_src->ctf_structs, cmp->cm_src);
510 			ctf_hash_dump("src unions",
511 			    &cmp->cm_src->ctf_unions, cmp->cm_src);
512 			ctf_hash_dump("out structs",
513 			    &cmp->cm_out->ctf_structs, cmp->cm_out);
514 			ctf_hash_dump("out unions",
515 			    &cmp->cm_out->ctf_unions, cmp->cm_out);
516 		}
517 		VERIFY(cmp->cm_tmap[id].cmt_map == suid);
518 	}
519 	cmp->cm_tmap[id].cmt_fixup = B_TRUE;
520 
521 	return (0);
522 }
523 
524 static int
525 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id)
526 {
527 	int kind, ret;
528 
529 	/*
530 	 * We may end up evaluating a type more than once as we may deal with it
531 	 * as we recursively evaluate some kind of reference and then we may see
532 	 * it normally.
533 	 */
534 	if (cmp->cm_tmap[id].cmt_map != 0)
535 		return (0);
536 
537 	kind = ctf_type_kind(cmp->cm_src, id);
538 	switch (kind) {
539 	case CTF_K_INTEGER:
540 	case CTF_K_FLOAT:
541 		ret = ctf_merge_add_number(cmp, id);
542 		break;
543 	case CTF_K_ARRAY:
544 		ret = ctf_merge_add_array(cmp, id);
545 		break;
546 	case CTF_K_POINTER:
547 	case CTF_K_VOLATILE:
548 	case CTF_K_CONST:
549 	case CTF_K_RESTRICT:
550 		ret = ctf_merge_add_reftype(cmp, id);
551 		break;
552 	case CTF_K_TYPEDEF:
553 		ret = ctf_merge_add_typedef(cmp, id);
554 		break;
555 	case CTF_K_ENUM:
556 		ret = ctf_merge_add_enum(cmp, id);
557 		break;
558 	case CTF_K_FUNCTION:
559 		ret = ctf_merge_add_func(cmp, id);
560 		break;
561 	case CTF_K_FORWARD: {
562 		const ctf_type_t *tp;
563 		uint_t kind;
564 
565 		tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
566 
567 		/*
568 		 * For forward declarations, ctt_type is the CTF_K_*
569 		 * kind for the tag. Older versions of the CTF tools may
570 		 * not have filled this in so if ctt_type is unknown or
571 		 * invalid, treat it as a struct. This mirrors the logic in
572 		 * ctf_bufopen().
573 		 */
574 
575 		kind = tp->ctt_type;
576 		if (kind == CTF_K_UNKNOWN || kind >= CTF_K_MAX)
577 			kind = CTF_K_STRUCT;
578 
579 		ret = ctf_merge_add_forward(cmp, id, kind);
580 		break;
581 	}
582 	case CTF_K_STRUCT:
583 	case CTF_K_UNION:
584 		ret = ctf_merge_add_sou(cmp, id, B_FALSE);
585 		break;
586 	case CTF_K_UNKNOWN:
587 		/*
588 		 * We don't add unknown types, and we later assert that nothing
589 		 * should reference them.
590 		 */
591 		return (0);
592 	default:
593 		abort();
594 	}
595 
596 	return (ret);
597 }
598 
599 static int
600 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id)
601 {
602 	ctf_dtdef_t *dtd;
603 	ctf_merge_su_t cms;
604 	ctf_id_t mapid;
605 	ssize_t size;
606 
607 	mapid = cmp->cm_tmap[id].cmt_map;
608 	VERIFY(mapid != 0);
609 	dtd = ctf_dtd_lookup(cmp->cm_out, mapid);
610 	VERIFY(dtd != NULL);
611 
612 	ctf_dprintf("Trying to fix up sou %d\n", id);
613 	cms.cms_cm = cmp;
614 	cms.cms_id = mapid;
615 	if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0)
616 		return (CTF_ERR);
617 
618 	if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR)
619 		return (CTF_ERR);
620 	if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR)
621 		return (CTF_ERR);
622 
623 	return (0);
624 }
625 
626 static int
627 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id)
628 {
629 	int kind, ret;
630 
631 	kind = ctf_type_kind(cmp->cm_src, id);
632 	switch (kind) {
633 	case CTF_K_STRUCT:
634 	case CTF_K_UNION:
635 		ret = ctf_merge_fixup_sou(cmp, id);
636 		break;
637 	default:
638 		VERIFY(0);
639 		ret = CTF_ERR;
640 	}
641 
642 	return (ret);
643 }
644 
645 /*
646  * Now that we've successfully merged everything, we're going to remap the type
647  * table.
648  *
649  * Remember we have two containers: ->cm_src is what we're working from, and
650  * ->cm_out is where we are building the de-duplicated CTF.
651  *
652  * The index of this table is always the type IDs in ->cm_src.
653  *
654  * When we built this table originally in ctf_diff_self(), if we found a novel
655  * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out.
656  * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates.
657  *
658  * Then, in ctf_merge_common(), we walked through and added all "cmt_missing"
659  * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map
660  * to be the *new* type ID in ->cm_out.  In this function, you can read
661  * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated".
662  *
663  * So at this point, we need to mop up all types where .cmt_missing == B_FALSE,
664  * making sure *their* .cmt_map values also point to the ->cm_out container.
665  */
666 static void
667 ctf_merge_dedup_remap(ctf_merge_types_t *cmp)
668 {
669 	int i;
670 
671 	for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) {
672 		ctf_id_t tid;
673 
674 		if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
675 			VERIFY(cmp->cm_tmap[i].cmt_map != 0);
676 			continue;
677 		}
678 
679 		tid = i;
680 		while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) {
681 			VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
682 			tid = cmp->cm_tmap[tid].cmt_map;
683 		}
684 		VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
685 		cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map;
686 	}
687 }
688 
689 
690 /*
691  * We're going to do three passes over the containers.
692  *
693  * Pass 1 checks for forward references in the output container that we know
694  * exist in the source container.
695  *
696  * Pass 2 adds all the missing types from the source container. As part of this
697  * we may be adding a type as a forward reference that doesn't exist yet.
698  * Any types that we encounter in this form, we need to add to a third pass.
699  *
700  * Pass 3 is the fixup pass. Here we go through and find all the types that were
701  * missing in the first.
702  *
703  * Importantly, we *must* call ctf_update between the second and third pass,
704  * otherwise several of the libctf functions will not properly find the data in
705  * the container. If we're doing a dedup we also fix up the type mapping.
706  */
707 static int
708 ctf_merge_common(ctf_merge_types_t *cmp)
709 {
710 	int ret, i;
711 
712 	ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL);
713 	ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL);
714 
715 	/* Pass 1 */
716 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
717 		if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
718 			ctf_dprintf("Forward %d\n", i);
719 			ret = ctf_merge_add_sou(cmp, i, B_TRUE);
720 			if (ret != 0) {
721 				return (ret);
722 			}
723 		}
724 	}
725 
726 	/* Pass 2 */
727 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
728 		if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
729 			ret = ctf_merge_add_type(cmp, i);
730 			if (ret != 0) {
731 				ctf_dprintf("Failed to merge type %d\n", i);
732 				return (ret);
733 			}
734 		}
735 	}
736 
737 	ret = ctf_update(cmp->cm_out);
738 	if (ret != 0)
739 		return (ret);
740 
741 	if (cmp->cm_dedup == B_TRUE) {
742 		ctf_merge_dedup_remap(cmp);
743 	}
744 
745 	ctf_dprintf("Beginning merge pass 3\n");
746 	/* Pass 3 */
747 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
748 		if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) {
749 			ret = ctf_merge_fixup_type(cmp, i);
750 			if (ret != 0)
751 				return (ret);
752 		}
753 	}
754 
755 	return (0);
756 }
757 
758 /*
759  * Uniquification is slightly different from a stock merge. For starters, we
760  * don't need to replace any forward references in the output. In this case
761  * though, the types that already exist are in a parent container to the empty
762  * output container.
763  */
764 static int
765 ctf_merge_uniquify_types(ctf_merge_types_t *cmp)
766 {
767 	int i, ret;
768 
769 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
770 		if (cmp->cm_tmap[i].cmt_missing == B_FALSE)
771 			continue;
772 		ret = ctf_merge_add_type(cmp, i);
773 		if (ret != 0)
774 			return (ret);
775 	}
776 
777 	ret = ctf_update(cmp->cm_out);
778 	if (ret != 0)
779 		return (ret);
780 
781 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
782 		if (cmp->cm_tmap[i].cmt_fixup == B_FALSE)
783 			continue;
784 		ret = ctf_merge_fixup_type(cmp, i);
785 		if (ret != 0)
786 			return (ret);
787 	}
788 
789 	return (0);
790 }
791 
792 static int
793 ctf_merge_types_init(ctf_merge_types_t *cmp)
794 {
795 	cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) *
796 	    (cmp->cm_src->ctf_typemax + 1));
797 	if (cmp->cm_tmap == NULL)
798 		return (ctf_set_errno(cmp->cm_out, ENOMEM));
799 	bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
800 	    (cmp->cm_src->ctf_typemax + 1));
801 	return (0);
802 }
803 
804 static void
805 ctf_merge_types_fini(ctf_merge_types_t *cmp)
806 {
807 	ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
808 	    (cmp->cm_src->ctf_typemax + 1));
809 }
810 
811 /*
812  * After performing a pass, we need to go through the object and function type
813  * maps and potentially fix them up based on the new maps that we have.
814  */
815 static void
816 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
817 {
818 	ctf_merge_objmap_t *cmo;
819 	ctf_merge_funcmap_t *cmf;
820 
821 	for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
822 	    cmo = list_next(&cmi->cmi_omap, cmo)) {
823 		VERIFY3S(cmo->cmo_tid, !=, 0);
824 		VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
825 		cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
826 	}
827 
828 	for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
829 	    cmf = list_next(&cmi->cmi_fmap, cmf)) {
830 		int i;
831 
832 		VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
833 		cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
834 		for (i = 0; i < cmf->cmf_argc; i++) {
835 			VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
836 			cmf->cmf_args[i] =
837 			    cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
838 		}
839 	}
840 }
841 
842 /*
843  * Merge the types contained inside of two input files. The second input file is
844  * always going to be the destination. We're guaranteed that it's always
845  * writeable.
846  */
847 static int
848 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
849 {
850 	int ret;
851 	ctf_merge_types_t cm;
852 	ctf_diff_t *cdp;
853 	ctf_merge_input_t *scmi = arg;
854 	ctf_merge_input_t *dcmi = arg2;
855 	ctf_file_t *out = dcmi->cmi_input;
856 	ctf_file_t *source = scmi->cmi_input;
857 
858 	ctf_dprintf("merging %p->%p\n", source, out);
859 
860 	if (!(out->ctf_flags & LCTF_RDWR))
861 		return (ctf_set_errno(out, ECTF_RDONLY));
862 
863 	if (ctf_getmodel(out) != ctf_getmodel(source))
864 		return (ctf_set_errno(out, ECTF_DMODEL));
865 
866 	if ((ret = ctf_diff_init(out, source, &cdp)) != 0)
867 		return (ret);
868 
869 	cm.cm_out = out;
870 	cm.cm_src = source;
871 	cm.cm_dedup = B_FALSE;
872 	cm.cm_unique = B_FALSE;
873 	ret = ctf_merge_types_init(&cm);
874 	if (ret != 0) {
875 		ctf_diff_fini(cdp);
876 		return (ctf_set_errno(out, ret));
877 	}
878 
879 	ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
880 	if (ret != 0)
881 		goto cleanup;
882 	ret = ctf_merge_common(&cm);
883 	ctf_dprintf("merge common returned with %d\n", ret);
884 	if (ret == 0) {
885 		ret = ctf_update(out);
886 		ctf_dprintf("update returned with %d\n", ret);
887 	} else {
888 		goto cleanup;
889 	}
890 
891 	/*
892 	 * Now we need to fix up the object and function maps.
893 	 */
894 	ctf_merge_fixup_symmaps(&cm, scmi);
895 
896 	/*
897 	 * Now that we've fixed things up, we need to give our function and
898 	 * object maps to the destination, such that it can continue to update
899 	 * them going forward.
900 	 */
901 	list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
902 	list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
903 
904 cleanup:
905 	if (ret == 0)
906 		*outp = dcmi;
907 	ctf_merge_types_fini(&cm);
908 	ctf_diff_fini(cdp);
909 	if (ret != 0)
910 		return (ctf_errno(out));
911 	ctf_phase_bump();
912 	return (0);
913 }
914 
915 static int
916 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
917 {
918 	int err, ret;
919 	ctf_file_t *out;
920 	ctf_merge_types_t cm;
921 	ctf_diff_t *cdp;
922 	ctf_merge_input_t *cmi;
923 	ctf_file_t *parent = cmh->cmh_unique;
924 
925 	*outp = NULL;
926 	out = ctf_fdcreate(cmh->cmh_ofd, &err);
927 	if (out == NULL)
928 		return (ctf_set_errno(src, err));
929 
930 	out->ctf_parname = cmh->cmh_pname;
931 	if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) {
932 		(void) ctf_set_errno(src, ctf_errno(out));
933 		ctf_close(out);
934 		return (CTF_ERR);
935 	}
936 
937 	if (ctf_import(out, parent) != 0) {
938 		(void) ctf_set_errno(src, ctf_errno(out));
939 		ctf_close(out);
940 		return (CTF_ERR);
941 	}
942 
943 	if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) {
944 		ctf_close(out);
945 		return (ctf_set_errno(src, ctf_errno(parent)));
946 	}
947 
948 	cm.cm_out = parent;
949 	cm.cm_src = src;
950 	cm.cm_dedup = B_FALSE;
951 	cm.cm_unique = B_TRUE;
952 	ret = ctf_merge_types_init(&cm);
953 	if (ret != 0) {
954 		ctf_close(out);
955 		ctf_diff_fini(cdp);
956 		return (ctf_set_errno(src, ret));
957 	}
958 
959 	ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
960 	if (ret == 0) {
961 		cm.cm_out = out;
962 		ret = ctf_merge_uniquify_types(&cm);
963 		if (ret == 0)
964 			ret = ctf_update(out);
965 	}
966 
967 	if (ret != 0) {
968 		ctf_merge_types_fini(&cm);
969 		ctf_diff_fini(cdp);
970 		return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
971 	}
972 
973 	for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
974 	    cmi = list_next(&cmh->cmh_inputs, cmi)) {
975 		ctf_merge_fixup_symmaps(&cm, cmi);
976 	}
977 
978 	ctf_merge_types_fini(&cm);
979 	ctf_diff_fini(cdp);
980 	*outp = out;
981 	return (0);
982 }
983 
984 static void
985 ctf_merge_fini_input(ctf_merge_input_t *cmi)
986 {
987 	ctf_merge_objmap_t *cmo;
988 	ctf_merge_funcmap_t *cmf;
989 
990 	while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL)
991 		ctf_free(cmo, sizeof (ctf_merge_objmap_t));
992 
993 	while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL)
994 		ctf_free(cmf, sizeof (ctf_merge_funcmap_t) +
995 		    sizeof (ctf_id_t) * cmf->cmf_argc);
996 
997 	if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL)
998 		ctf_close(cmi->cmi_input);
999 
1000 	ctf_free(cmi, sizeof (ctf_merge_input_t));
1001 }
1002 
1003 void
1004 ctf_merge_fini(ctf_merge_t *cmh)
1005 {
1006 	size_t len;
1007 	ctf_merge_input_t *cmi;
1008 
1009 	if (cmh->cmh_label != NULL) {
1010 		len = strlen(cmh->cmh_label) + 1;
1011 		ctf_free(cmh->cmh_label, len);
1012 	}
1013 
1014 	if (cmh->cmh_pname != NULL) {
1015 		len = strlen(cmh->cmh_pname) + 1;
1016 		ctf_free(cmh->cmh_pname, len);
1017 	}
1018 
1019 	while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL)
1020 		ctf_merge_fini_input(cmi);
1021 
1022 	ctf_free(cmh, sizeof (ctf_merge_t));
1023 }
1024 
1025 ctf_merge_t *
1026 ctf_merge_init(int fd, int *errp)
1027 {
1028 	int err;
1029 	ctf_merge_t *out;
1030 	struct stat st;
1031 
1032 	if (errp == NULL)
1033 		errp = &err;
1034 
1035 	if (fd != -1 && fstat(fd, &st) != 0) {
1036 		*errp = EINVAL;
1037 		return (NULL);
1038 	}
1039 
1040 	out = ctf_alloc(sizeof (ctf_merge_t));
1041 	if (out == NULL) {
1042 		*errp = ENOMEM;
1043 		return (NULL);
1044 	}
1045 
1046 	if (fd == -1) {
1047 		out->cmh_msyms = B_FALSE;
1048 	} else {
1049 		out->cmh_msyms = B_TRUE;
1050 	}
1051 
1052 	list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t),
1053 	    offsetof(ctf_merge_input_t, cmi_node));
1054 	out->cmh_ninputs = 0;
1055 	out->cmh_nthreads = 1;
1056 	out->cmh_unique = NULL;
1057 	out->cmh_ofd = fd;
1058 	out->cmh_flags = 0;
1059 	out->cmh_label = NULL;
1060 	out->cmh_pname = NULL;
1061 
1062 	return (out);
1063 }
1064 
1065 int
1066 ctf_merge_label(ctf_merge_t *cmh, const char *label)
1067 {
1068 	char *dup;
1069 
1070 	if (label == NULL)
1071 		return (EINVAL);
1072 
1073 	dup = ctf_strdup(label);
1074 	if (dup == NULL)
1075 		return (EAGAIN);
1076 
1077 	if (cmh->cmh_label != NULL) {
1078 		size_t len = strlen(cmh->cmh_label) + 1;
1079 		ctf_free(cmh->cmh_label, len);
1080 	}
1081 
1082 	cmh->cmh_label = dup;
1083 	return (0);
1084 }
1085 
1086 static int
1087 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx,
1088     const char *file, const char *name, const Elf64_Sym *symp)
1089 {
1090 	ctf_merge_funcmap_t *fmap;
1091 
1092 	fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
1093 	    sizeof (ctf_id_t) * fip->ctc_argc);
1094 	if (fmap == NULL)
1095 		return (ENOMEM);
1096 
1097 	fmap->cmf_idx = idx;
1098 	fmap->cmf_sym = *symp;
1099 	fmap->cmf_rtid = fip->ctc_return;
1100 	fmap->cmf_flags = fip->ctc_flags;
1101 	fmap->cmf_argc = fip->ctc_argc;
1102 	fmap->cmf_name = name;
1103 	if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1104 		fmap->cmf_file = file;
1105 	} else {
1106 		fmap->cmf_file = NULL;
1107 	}
1108 
1109 	if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
1110 	    fmap->cmf_args) != 0) {
1111 		ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
1112 		    sizeof (ctf_id_t) * fip->ctc_argc);
1113 		return (ctf_errno(cmi->cmi_input));
1114 	}
1115 
1116 	ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx,
1117 	    fmap->cmf_file != NULL ? fmap->cmf_file : "global",
1118 	    ELF64_ST_BIND(symp->st_info));
1119 	list_insert_tail(&cmi->cmi_fmap, fmap);
1120 	return (0);
1121 }
1122 
1123 static int
1124 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx,
1125     const char *file, const char *name, const Elf64_Sym *symp)
1126 {
1127 	ctf_merge_objmap_t *cmo;
1128 
1129 	cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
1130 	if (cmo == NULL)
1131 		return (ENOMEM);
1132 
1133 	cmo->cmo_name = name;
1134 	if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1135 		cmo->cmo_file = file;
1136 	} else {
1137 		cmo->cmo_file = NULL;
1138 	}
1139 	cmo->cmo_idx = idx;
1140 	cmo->cmo_tid = id;
1141 	cmo->cmo_sym = *symp;
1142 	list_insert_tail(&cmi->cmi_omap, cmo);
1143 
1144 	ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id,
1145 	    cmo->cmo_file != NULL ? cmo->cmo_file : "global");
1146 
1147 	return (0);
1148 }
1149 
1150 static int
1151 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file,
1152     const char *name, boolean_t primary, void *arg)
1153 {
1154 	ctf_merge_input_t *cmi = arg;
1155 	ctf_file_t *fp = cmi->cmi_input;
1156 	ushort_t *data, funcbase;
1157 	uint_t type;
1158 	ctf_funcinfo_t fi;
1159 
1160 	/*
1161 	 * See if there is type information for this. If there is no
1162 	 * type information for this entry or no translation, then we
1163 	 * will find the value zero. This indicates no type ID for
1164 	 * objects and encodes unknown information for functions.
1165 	 */
1166 	if (fp->ctf_sxlate[idx] == -1u)
1167 		return (0);
1168 	data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]);
1169 	if (*data == 0)
1170 		return (0);
1171 
1172 	type = ELF64_ST_TYPE(symp->st_info);
1173 
1174 	switch (type) {
1175 	case STT_FUNC:
1176 		funcbase = *data;
1177 		if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION)
1178 			return (0);
1179 		data++;
1180 		fi.ctc_return = *data;
1181 		data++;
1182 		fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase);
1183 		fi.ctc_flags = 0;
1184 
1185 		if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) {
1186 			fi.ctc_flags |= CTF_FUNC_VARARG;
1187 			fi.ctc_argc--;
1188 		}
1189 		return (ctf_merge_add_function(cmi, &fi, idx, file, name,
1190 		    symp));
1191 	case STT_OBJECT:
1192 		return (ctf_merge_add_object(cmi, *data, idx, file, name,
1193 		    symp));
1194 	default:
1195 		return (0);
1196 	}
1197 }
1198 
1199 /*
1200  * Whenever we create an entry to merge, we then go and add a second empty
1201  * ctf_file_t which we use for the purposes of our merging. It's not the best,
1202  * but it's the best that we've got at the moment.
1203  */
1204 int
1205 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
1206 {
1207 	int ret;
1208 	ctf_merge_input_t *cmi;
1209 	ctf_file_t *empty;
1210 
1211 	ctf_dprintf("adding input %p\n", input);
1212 
1213 	if (input->ctf_flags & LCTF_CHILD)
1214 		return (ECTF_MCHILD);
1215 
1216 	cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1217 	if (cmi == NULL)
1218 		return (ENOMEM);
1219 
1220 	cmi->cmi_created = B_FALSE;
1221 	cmi->cmi_input = input;
1222 	list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1223 	    offsetof(ctf_merge_funcmap_t, cmf_node));
1224 	list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1225 	    offsetof(ctf_merge_objmap_t, cmo_node));
1226 
1227 	if (cmh->cmh_msyms == B_TRUE) {
1228 		if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol,
1229 		    cmi)) != 0) {
1230 			ctf_merge_fini_input(cmi);
1231 			return (ret);
1232 		}
1233 	}
1234 
1235 	list_insert_tail(&cmh->cmh_inputs, cmi);
1236 	cmh->cmh_ninputs++;
1237 
1238 	/* And now the empty one to merge into this */
1239 	cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1240 	if (cmi == NULL)
1241 		return (ENOMEM);
1242 	list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1243 	    offsetof(ctf_merge_funcmap_t, cmf_node));
1244 	list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1245 	    offsetof(ctf_merge_objmap_t, cmo_node));
1246 
1247 	empty = ctf_fdcreate(cmh->cmh_ofd, &ret);
1248 	if (empty == NULL)
1249 		return (ret);
1250 	cmi->cmi_input = empty;
1251 	cmi->cmi_created = B_TRUE;
1252 
1253 	if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) {
1254 		return (ctf_errno(empty));
1255 	}
1256 
1257 	list_insert_tail(&cmh->cmh_inputs, cmi);
1258 	cmh->cmh_ninputs++;
1259 	ctf_dprintf("added containers %p and %p\n", input, empty);
1260 	return (0);
1261 }
1262 
1263 int
1264 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname)
1265 {
1266 	char *dup;
1267 
1268 	if (u->ctf_flags & LCTF_CHILD)
1269 		return (ECTF_MCHILD);
1270 	if (pname == NULL)
1271 		return (EINVAL);
1272 	dup = ctf_strdup(pname);
1273 	if (dup == NULL)
1274 		return (EINVAL);
1275 	if (cmh->cmh_pname != NULL) {
1276 		size_t len = strlen(cmh->cmh_pname) + 1;
1277 		ctf_free(cmh->cmh_pname, len);
1278 	}
1279 	cmh->cmh_pname = dup;
1280 	cmh->cmh_unique = u;
1281 	return (0);
1282 }
1283 
1284 /*
1285  * Symbol matching rules: the purpose of this is to verify that the type
1286  * information that we have for a given symbol actually matches the output
1287  * symbol. This is unfortunately complicated by several different factors:
1288  *
1289  * 1. When merging multiple .o's into a single item, the symbol table index will
1290  * not match.
1291  *
1292  * 2. Visibility of a symbol may not be identical to the object file or the
1293  * DWARF information due to symbol reduction via a mapfile.
1294  *
1295  * As such, we have to employ the following rules:
1296  *
1297  * 1. A global symbol table entry always matches a global CTF symbol with the
1298  * same name.
1299  *
1300  * 2. A local symbol table entry always matches a local CTF symbol if they have
1301  * the same name and they belong to the same file.
1302  *
1303  * 3. A weak symbol matches a non-weak symbol. This happens if we find that the
1304  * types match, the values match, the sizes match, and the section indexes
1305  * match. This happens when we do a conversion in one pass, it almost never
1306  * happens when we're merging multiple object files. If we match a CTF global
1307  * symbol, that's a fixed match, otherwise it's a fuzzy match.
1308  *
1309  * 4. A local symbol table entry matches a global CTF entry if the
1310  * other pieces fail, but they have the same name. This is considered a fuzzy
1311  * match and is not used unless we have no other options.
1312  *
1313  * 5. A weak symbol table entry matches a weak CTF entry if the other pieces
1314  * fail, but they have the same name. This is considered a fuzzy match and is
1315  * not used unless we have no other options. When merging independent .o files,
1316  * this is often the only recourse we have to matching weak symbols.
1317  *
1318  * In the end, this would all be much simpler if we were able to do this as part
1319  * of libld which would be able to do all the symbol transformations.
1320  */
1321 static boolean_t
1322 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name,
1323     const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name,
1324     const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy)
1325 {
1326 	*is_fuzzy = B_FALSE;
1327 	uint_t symtab_bind, ctf_bind;
1328 
1329 	symtab_bind = ELF64_ST_BIND(symtab_symp->st_info);
1330 	ctf_bind = ELF64_ST_BIND(ctf_symp->st_info);
1331 
1332 	ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n",
1333 	    symtab_file, symtab_name, symtab_bind,
1334 	    ctf_file, ctf_name, ctf_bind);
1335 	if (strcmp(ctf_name, symtab_name) != 0) {
1336 		return (B_FALSE);
1337 	}
1338 
1339 	if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) {
1340 		return (B_TRUE);
1341 	} else if (symtab_bind == STB_GLOBAL) {
1342 		return (B_FALSE);
1343 	}
1344 
1345 	if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind &&
1346 	    ctf_file != NULL && symtab_file != NULL &&
1347 	    strcmp(ctf_file, symtab_file) == 0) {
1348 		return (B_TRUE);
1349 	}
1350 
1351 	if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK &&
1352 	    ELF64_ST_TYPE(symtab_symp->st_info) ==
1353 	    ELF64_ST_TYPE(ctf_symp->st_info) &&
1354 	    symtab_symp->st_value == ctf_symp->st_value &&
1355 	    symtab_symp->st_size == ctf_symp->st_size &&
1356 	    symtab_symp->st_shndx == ctf_symp->st_shndx) {
1357 		if (ctf_bind == STB_GLOBAL) {
1358 			return (B_TRUE);
1359 		}
1360 
1361 		if (ctf_bind == STB_LOCAL && ctf_file != NULL &&
1362 		    symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) {
1363 			*is_fuzzy = B_TRUE;
1364 			return (B_TRUE);
1365 		}
1366 	}
1367 
1368 	if (ctf_bind == STB_GLOBAL ||
1369 	    (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) {
1370 		*is_fuzzy = B_TRUE;
1371 		return (B_TRUE);
1372 	}
1373 
1374 	return (B_FALSE);
1375 }
1376 
1377 /*
1378  * For each symbol, try and find a match. We will attempt to find an exact
1379  * match; however, we will settle for a fuzzy match in general. There is one
1380  * case where we will not opt to use a fuzzy match, which is when performing the
1381  * deduplication of a container. In such a case we are trying to reduce common
1382  * types and a fuzzy match would be inappropriate as if we're in the context of
1383  * a single container, the conversion process should have identified any exact
1384  * or fuzzy matches that were required.
1385  */
1386 static int
1387 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file,
1388     const char *name, boolean_t primary, void *arg)
1389 {
1390 	int err;
1391 	uint_t type, bind;
1392 	ctf_merge_symbol_arg_t *csa = arg;
1393 	ctf_file_t *fp = csa->cmsa_out;
1394 
1395 	type = ELF64_ST_TYPE(symp->st_info);
1396 	bind = ELF64_ST_BIND(symp->st_info);
1397 
1398 	ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name,
1399 	    ELF64_ST_BIND(symp->st_info));
1400 
1401 	if (type == STT_OBJECT) {
1402 		ctf_merge_objmap_t *cmo, *match = NULL;
1403 
1404 		for (cmo = list_head(csa->cmsa_objmap); cmo != NULL;
1405 		    cmo = list_next(csa->cmsa_objmap, cmo)) {
1406 			boolean_t is_fuzzy = B_FALSE;
1407 			if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name,
1408 			    &cmo->cmo_sym, file, name, symp, &is_fuzzy)) {
1409 				if (is_fuzzy && csa->cmsa_dedup &&
1410 				    bind != STB_WEAK) {
1411 					continue;
1412 				}
1413 				match = cmo;
1414 				if (is_fuzzy) {
1415 					continue;
1416 				}
1417 				break;
1418 			}
1419 		}
1420 
1421 		if (match == NULL) {
1422 			return (0);
1423 		}
1424 
1425 		if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) {
1426 			ctf_dprintf("Failed to add symbol %s->%d: %s\n", name,
1427 			    match->cmo_tid, ctf_errmsg(ctf_errno(fp)));
1428 			return (ctf_errno(fp));
1429 		}
1430 		ctf_dprintf("mapped object into output %s/%s->%ld\n", file,
1431 		    name, match->cmo_tid);
1432 	} else {
1433 		ctf_merge_funcmap_t *cmf, *match = NULL;
1434 		ctf_funcinfo_t fi;
1435 
1436 		for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL;
1437 		    cmf = list_next(csa->cmsa_funcmap, cmf)) {
1438 			boolean_t is_fuzzy = B_FALSE;
1439 			if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name,
1440 			    &cmf->cmf_sym, file, name, symp, &is_fuzzy)) {
1441 				if (is_fuzzy && csa->cmsa_dedup &&
1442 				    bind != STB_WEAK) {
1443 					continue;
1444 				}
1445 				match = cmf;
1446 				if (is_fuzzy) {
1447 					continue;
1448 				}
1449 				break;
1450 			}
1451 		}
1452 
1453 		if (match == NULL) {
1454 			return (0);
1455 		}
1456 
1457 		fi.ctc_return = match->cmf_rtid;
1458 		fi.ctc_argc = match->cmf_argc;
1459 		fi.ctc_flags = match->cmf_flags;
1460 		if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) !=
1461 		    0) {
1462 			ctf_dprintf("Failed to add function %s: %s\n", name,
1463 			    ctf_errmsg(ctf_errno(fp)));
1464 			return (ctf_errno(fp));
1465 		}
1466 		ctf_dprintf("mapped function into output %s/%s\n", file,
1467 		    name);
1468 	}
1469 
1470 	return (0);
1471 }
1472 
1473 int
1474 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
1475 {
1476 	int err, merr;
1477 	ctf_merge_input_t *cmi;
1478 	ctf_id_t ltype;
1479 	mergeq_t *mqp;
1480 	ctf_merge_input_t *final;
1481 	ctf_file_t *out;
1482 
1483 	ctf_dprintf("Beginning ctf_merge_merge()\n");
1484 	if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
1485 		const char *label = ctf_label_topmost(cmh->cmh_unique);
1486 		if (label == NULL)
1487 			return (ECTF_NOLABEL);
1488 		if (strcmp(label, cmh->cmh_label) != 0)
1489 			return (ECTF_LCONFLICT);
1490 	}
1491 
1492 	if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
1493 		return (errno);
1494 	}
1495 
1496 	VERIFY(cmh->cmh_ninputs % 2 == 0);
1497 	for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1498 	    cmi = list_next(&cmh->cmh_inputs, cmi)) {
1499 		if (mergeq_add(mqp, cmi) == -1) {
1500 			err = errno;
1501 			mergeq_fini(mqp);
1502 		}
1503 	}
1504 
1505 	err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr);
1506 	mergeq_fini(mqp);
1507 
1508 	if (err == MERGEQ_ERROR) {
1509 		return (errno);
1510 	} else if (err == MERGEQ_UERROR) {
1511 		return (merr);
1512 	}
1513 
1514 	/*
1515 	 * Disassociate the generated ctf_file_t from the original input. That
1516 	 * way when the input gets cleaned up, we don't accidentally kill the
1517 	 * final reference to the ctf_file_t. If it gets uniquified then we'll
1518 	 * kill it.
1519 	 */
1520 	VERIFY(final->cmi_input != NULL);
1521 	out = final->cmi_input;
1522 	final->cmi_input = NULL;
1523 
1524 	ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique);
1525 	if (cmh->cmh_unique != NULL) {
1526 		ctf_file_t *u;
1527 		err = ctf_uniquify_types(cmh, out, &u);
1528 		if (err != 0) {
1529 			err = ctf_errno(out);
1530 			ctf_close(out);
1531 			return (err);
1532 		}
1533 		ctf_close(out);
1534 		out = u;
1535 	}
1536 
1537 	ltype = out->ctf_typemax;
1538 	if ((out->ctf_flags & LCTF_CHILD) && ltype != 0)
1539 		ltype += CTF_CHILD_START;
1540 	ctf_dprintf("trying to add the label\n");
1541 	if (cmh->cmh_label != NULL &&
1542 	    ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
1543 		ctf_close(out);
1544 		return (ctf_errno(out));
1545 	}
1546 
1547 	ctf_dprintf("merging symbols and the like\n");
1548 	if (cmh->cmh_msyms == B_TRUE) {
1549 		ctf_merge_symbol_arg_t arg;
1550 		arg.cmsa_objmap = &final->cmi_omap;
1551 		arg.cmsa_funcmap = &final->cmi_fmap;
1552 		arg.cmsa_out = out;
1553 		arg.cmsa_dedup = B_FALSE;
1554 		err = ctf_symtab_iter(out, ctf_merge_symbols, &arg);
1555 		if (err != 0) {
1556 			ctf_close(out);
1557 			return (err);
1558 		}
1559 	}
1560 
1561 	err = ctf_update(out);
1562 	if (err != 0) {
1563 		err = ctf_errno(out);
1564 		ctf_close(out);
1565 		return (err);
1566 	}
1567 
1568 	*outp = out;
1569 	return (0);
1570 }
1571 
1572 /*
1573  * When we get told that something is unique, eg. same is B_FALSE, then that
1574  * tells us that we need to add it to the output. If same is B_TRUE, then we'll
1575  * want to record it in the mapping table so that we know how to redirect types
1576  * to the extant ones.
1577  */
1578 static void
1579 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
1580     ctf_id_t oid, void *arg)
1581 {
1582 	ctf_merge_types_t *cmp = arg;
1583 	ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
1584 
1585 	if (same == B_TRUE) {
1586 		/*
1587 		 * The output id here may itself map to something else.
1588 		 * Therefore, we need to basically walk a chain and see what it
1589 		 * points to until it itself points to a base type, eg. -1.
1590 		 * Otherwise we'll dedup to something which no longer exists.
1591 		 */
1592 		while (cmt[oid].cmt_missing == B_FALSE)
1593 			oid = cmt[oid].cmt_map;
1594 		cmt[iid].cmt_map = oid;
1595 		ctf_dprintf("dedup %d->%d \n", iid, oid);
1596 	} else {
1597 		VERIFY(cmt[iid].cmt_map == 0);
1598 		cmt[iid].cmt_missing = B_TRUE;
1599 		ctf_dprintf("dedup %d is missing\n", iid);
1600 	}
1601 }
1602 
1603 /*
1604  * Dedup a CTF container.
1605  *
1606  * DWARF and other encoding formats that we use to create CTF data may create
1607  * multiple copies of a given type. However, after doing a conversion, and
1608  * before doing a merge, we'd prefer, if possible, to have every input container
1609  * to be unique.
1610  *
1611  * Doing a deduplication is like a normal merge. However, when we diff the types
1612  * in the container, rather than doing a normal diff, we instead want to diff
1613  * against any already processed types. eg, for a given type i in a container,
1614  * we want to diff it from 0 to i - 1.
1615  */
1616 int
1617 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp)
1618 {
1619 	int ret;
1620 	ctf_diff_t *cdp = NULL;
1621 	ctf_merge_input_t *cmi, *cmc;
1622 	ctf_file_t *ifp, *ofp;
1623 	ctf_merge_types_t cm;
1624 
1625 	if (cmp == NULL || outp == NULL)
1626 		return (EINVAL);
1627 
1628 	ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs);
1629 	if (cmp->cmh_ninputs != 2)
1630 		return (EINVAL);
1631 
1632 	ctf_dprintf("passed argument sanity check\n");
1633 
1634 	cmi = list_head(&cmp->cmh_inputs);
1635 	VERIFY(cmi != NULL);
1636 	cmc = list_next(&cmp->cmh_inputs, cmi);
1637 	VERIFY(cmc != NULL);
1638 	ifp = cmi->cmi_input;
1639 	ofp = cmc->cmi_input;
1640 	VERIFY(ifp != NULL);
1641 	VERIFY(ofp != NULL);
1642 	cm.cm_src = ifp;
1643 	cm.cm_out = ofp;
1644 	cm.cm_dedup = B_TRUE;
1645 	cm.cm_unique = B_FALSE;
1646 
1647 	if ((ret = ctf_merge_types_init(&cm)) != 0) {
1648 		return (ret);
1649 	}
1650 
1651 	if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0)
1652 		goto err;
1653 
1654 	ctf_dprintf("Successfully initialized dedup\n");
1655 	if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0)
1656 		goto err;
1657 
1658 	ctf_dprintf("Successfully diffed types\n");
1659 	ret = ctf_merge_common(&cm);
1660 	ctf_dprintf("deduping types result: %d\n", ret);
1661 	if (ret == 0)
1662 		ret = ctf_update(cm.cm_out);
1663 	if (ret != 0)
1664 		goto err;
1665 
1666 	ctf_dprintf("Successfully deduped types\n");
1667 	ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL);
1668 
1669 	/*
1670 	 * Now we need to fix up the object and function maps.
1671 	 */
1672 	ctf_merge_fixup_symmaps(&cm, cmi);
1673 
1674 	if (cmp->cmh_msyms == B_TRUE) {
1675 		ctf_merge_symbol_arg_t arg;
1676 		arg.cmsa_objmap = &cmi->cmi_omap;
1677 		arg.cmsa_funcmap = &cmi->cmi_fmap;
1678 		arg.cmsa_out = cm.cm_out;
1679 		arg.cmsa_dedup = B_TRUE;
1680 		ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg);
1681 		if (ret != 0) {
1682 			ctf_dprintf("failed to dedup symbols: %s\n",
1683 			    ctf_errmsg(ret));
1684 			goto err;
1685 		}
1686 	}
1687 
1688 	ret = ctf_update(cm.cm_out);
1689 	if (ret == 0) {
1690 		cmc->cmi_input = NULL;
1691 		*outp = cm.cm_out;
1692 	}
1693 	ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL);
1694 err:
1695 	ctf_merge_types_fini(&cm);
1696 	ctf_diff_fini(cdp);
1697 	return (ret);
1698 }
1699 
1700 int
1701 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
1702 {
1703 	if (nthrs == 0)
1704 		return (EINVAL);
1705 	cmp->cmh_nthreads = nthrs;
1706 	return (0);
1707 }
1708