1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1988 AT&T 24 * All Rights Reserved 25 * 26 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 27 * 28 * Copyright 2019 Joyent, Inc. 29 */ 30 31 /* 32 * Map file parsing (Shared Core Code). 33 */ 34 #include <fcntl.h> 35 #include <stdio.h> 36 #include <unistd.h> 37 #include <sys/stat.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <dirent.h> 41 #include <ctype.h> 42 #include <debug.h> 43 #include "msg.h" 44 #include "_libld.h" 45 #include "_map.h" 46 47 /* 48 * There are two styles of mapfile supported by the link-editor: 49 * 50 * 1) The original System V defined syntax, as augmented at Sun 51 * from Solaris 2.0 through Solaris 10. This style is also known 52 * as version 1. 53 * 54 * 2) A newer syntax, currently at version 2. 55 * 56 * The original syntax uses special characters (=, :, -, |, etc) as 57 * operators to indicate the operation being specified. Over the years, 58 * this syntax has been problematic: 59 * 60 * 1) Too cryptic: It's hard for people to remember which character 61 * means what. 62 * 63 * 2) Limited expansion potential: There only a few special characters 64 * available on the keyboard for new features, and it is difficult to 65 * add options to existing ones. 66 * 67 * Adding new features into this framework (2) have the effect of 68 * making the syntax even more cryptic (1). The newer syntax addresses 69 * these issues by moving to an extendible identifier based syntax that 70 * allows new features to be added without complicating old ones. 71 * 72 * The new syntax uses the following terminology: 73 * 74 * - Control directives are the directives that start with a '$'. 75 * They control how the mapfile is interpreted. We use the 'cdir_' 76 * prefix on functions and variables related to these directives. 77 * 78 * - Conditional Expressions are the expressions found in $if and $elif 79 * control directives. They evaluate to boolean true/false values. 80 * We use the 'cexp_' prefix for functions and variables related to 81 * these expressions. 82 * 83 * - Regular Directives are names (SYMBOL, VERSION, etc) that convey 84 * directions to the link-editor for building the output object. 85 * 86 * This file contains core code used by both mapfile styles: File management, 87 * lexical analysis, and other shared core functionality. It also contains 88 * the code for control directives, as they are intrinsically part of 89 * lexical analysis --- this is disabled when processing Sysv mapfiles. 90 */ 91 92 /* 93 * We use a stack of cdir_level_t structs to manage $if/$elif/$else/$endif 94 * processing. At each level, we keep track of the information needed to 95 * determine whether or not to process nested input lines or skip them, 96 * along with information needed to report errors. 97 */ 98 typedef struct { 99 Lineno cdl_if_lineno; /* Line number of opening $if */ 100 Lineno cdl_else_lineno; /* 0, or line on which $else seen */ 101 int cdl_done; /* True if no longer accepts input */ 102 int cdl_pass; /* True if currently accepting input */ 103 } cdir_level_t; 104 105 /* Operators in the expressions accepted by $if/$elif */ 106 typedef enum { 107 CEXP_OP_NONE, /* Not an operator */ 108 CEXP_OP_AND, /* && */ 109 CEXP_OP_OR, /* || */ 110 CEXP_OP_NEG, /* ! */ 111 CEXP_OP_OPAR, /* ( */ 112 CEXP_OP_CPAR /* ) */ 113 } cexp_op_t; 114 115 /* 116 * Type of conditional expression identifier AVL tree nodes 117 */ 118 typedef struct cexp_name_node { 119 avl_node_t ceid_avlnode; /* AVL book-keeping */ 120 const char *ceid_name; /* boolean identifier name */ 121 } cexp_id_node_t; 122 123 124 /* 125 * Declare a "stack" type, containing a pointer to data, a count of 126 * allocated, and currently used items in the stack. The data type 127 * is specified as the _type argument. 128 */ 129 #define STACK(_type) \ 130 struct { \ 131 _type *stk_s; /* Stack array */ \ 132 size_t stk_n; /* Current stack depth */ \ 133 size_t stk_n_alloc; /* # of elements pointed at by s */ \ 134 } 135 136 /* 137 * The following type represents a "generic" stack, where the data 138 * type is (void). This type is never instantiated. However, it has 139 * the same struct layout as any other STACK(), and is therefore a good 140 * generic type that can be used for stack_resize(). 141 */ 142 typedef STACK(void) generic_stack_t; 143 144 /* 145 * Ensure that the stack has enough room to push one more item 146 */ 147 #define STACK_RESERVE(_stack, _n_default) \ 148 (((_stack).stk_n < (_stack).stk_n_alloc) || \ 149 stack_resize((generic_stack_t *)&(_stack).stk_s, _n_default, \ 150 sizeof (*(_stack).stk_s))) 151 152 /* 153 * Reset a stack to empty. 154 */ 155 #define STACK_RESET(_stack) (_stack).stk_n = 0; 156 157 /* 158 * True if stack is empty, False otherwise. 159 */ 160 #define STACK_IS_EMPTY(_stack) ((_stack).stk_n == 0) 161 162 /* 163 * Push a value onto a stack. Caller must ensure that stack has room. 164 * This macro is intended to be used as the LHS of an assignment, the 165 * RHS of which is the value: 166 * 167 * STACK_PUSH(stack) = value; 168 */ 169 #define STACK_PUSH(_stack) (_stack).stk_s[(_stack).stk_n++] 170 171 /* 172 * Pop a value off a stack. Caller must ensure 173 * that stack is not empty. 174 */ 175 #define STACK_POP(_stack) ((_stack).stk_s[--(_stack).stk_n]) 176 177 /* 178 * Access top element on stack without popping. Caller must ensure 179 * that stack is not empty. 180 */ 181 #define STACK_TOP(_stack) (((_stack).stk_s)[(_stack).stk_n - 1]) 182 183 /* 184 * Initial sizes used for the stacks: The stacks are allocated on demand 185 * to these sizes, and then doubled as necessary until they are large enough. 186 * 187 * The ideal size would be large enough that only a single allocation 188 * occurs, and our defaults should generally have that effect. However, 189 * in doing so, we run the risk of a latent error in the resize code going 190 * undetected until triggered by a large task in the field. For this reason, 191 * we set the sizes to the smallest size possible when compiled for debug. 192 */ 193 #ifdef DEBUG 194 #define CDIR_STACK_INIT 1 195 #define CEXP_OP_STACK_INIT 1 196 #define CEXP_VAL_STACK_INIT 1 197 #else 198 #define CDIR_STACK_INIT 16 199 #define CEXP_OP_STACK_INIT 8 200 #define CEXP_VAL_STACK_INIT (CEXP_OP_STACK_INIT * 2) /* 2 vals per binop */ 201 #endif 202 203 204 /* 205 * Persistent state maintained by map module in between calls. 206 * 207 * This is kept as static file scope data, because it is only used 208 * when libld is called by ld, and not by rtld. If that should change, 209 * the code is designed so that it can become reentrant easily: 210 * 211 * - Add a pointer to the output descriptor to a structure of this type, 212 * allocated dynamically on the first call to ld_map_parse(). 213 * - Change all references to lms to instead reference the pointer in 214 * the output descriptor. 215 * 216 * Until then, it is simpler not to expose these details. 217 */ 218 typedef struct { 219 int lms_cdir_valid; /* Allow control dir. on entry to gettoken() */ 220 STACK(cdir_level_t) lms_cdir_stack; /* Conditional input level */ 221 STACK(cexp_op_t) lms_cexp_op_stack; /* Cond. expr operators */ 222 STACK(uchar_t) lms_cexp_val_stack; /* Cond. expr values */ 223 avl_tree_t *lms_cexp_id; 224 } ld_map_state_t; 225 static ld_map_state_t lms; 226 227 228 /* 229 * Version 1 (SysV) syntax dispatch table for ld_map_gettoken(). For each 230 * of the 7-bit ASCII characters, determine how the lexical analyzer 231 * should behave. 232 * 233 * This table must be kept in sync with tkid_attr[] below. 234 * 235 * Identifier Note: 236 * The Linker and Libraries Guide states that the original syntax uses 237 * C identifier rules, allowing '.' to be treated as a letter. However, 238 * the implementation is considerably looser than that: Any character 239 * with an ASCII code (0-127) which is printable and not used to start 240 * another token is allowed to start an identifier, and they are terminated 241 * by any of: space, double quote, tab, newline, ':', ';', '=', or '#'. 242 * The original code has been replaced, but this table encodes the same 243 * rules, to ensure backward compatibility. 244 */ 245 static const mf_tokdisp_t gettok_dispatch_v1 = { 246 TK_OP_EOF, /* 0 - NUL */ 247 TK_OP_ILLCHR, /* 1 - SOH */ 248 TK_OP_ILLCHR, /* 2 - STX */ 249 TK_OP_ILLCHR, /* 3 - ETX */ 250 TK_OP_ILLCHR, /* 4 - EOT */ 251 TK_OP_ILLCHR, /* 5 - ENQ */ 252 TK_OP_ILLCHR, /* 6 - ACK */ 253 TK_OP_ILLCHR, /* 7 - BEL */ 254 TK_OP_ILLCHR, /* 8 - BS */ 255 TK_OP_WS, /* 9 - HT */ 256 TK_OP_NL, /* 10 - NL */ 257 TK_OP_WS, /* 11 - VT */ 258 TK_OP_WS, /* 12 - FF */ 259 TK_OP_WS, /* 13 - CR */ 260 TK_OP_ILLCHR, /* 14 - SO */ 261 TK_OP_ILLCHR, /* 15 - SI */ 262 TK_OP_ILLCHR, /* 16 - DLE */ 263 TK_OP_ILLCHR, /* 17 - DC1 */ 264 TK_OP_ILLCHR, /* 18 - DC2 */ 265 TK_OP_ILLCHR, /* 19 - DC3 */ 266 TK_OP_ILLCHR, /* 20 - DC4 */ 267 TK_OP_ILLCHR, /* 21 - NAK */ 268 TK_OP_ILLCHR, /* 22 - SYN */ 269 TK_OP_ILLCHR, /* 23 - ETB */ 270 TK_OP_ILLCHR, /* 24 - CAN */ 271 TK_OP_ILLCHR, /* 25 - EM */ 272 TK_OP_ILLCHR, /* 26 - SUB */ 273 TK_OP_ILLCHR, /* 27 - ESC */ 274 TK_OP_ILLCHR, /* 28 - FS */ 275 TK_OP_ILLCHR, /* 29 - GS */ 276 TK_OP_ILLCHR, /* 30 - RS */ 277 TK_OP_ILLCHR, /* 31 - US */ 278 TK_OP_WS, /* 32 - SP */ 279 TK_OP_ID, /* 33 - ! */ 280 TK_OP_SIMQUOTE, /* 34 - " */ 281 TK_OP_CMT, /* 35 - # */ 282 TK_OP_ID, /* 36 - $ */ 283 TK_OP_ID, /* 37 - % */ 284 TK_OP_ID, /* 38 - & */ 285 TK_OP_ID, /* 39 - ' */ 286 TK_OP_ID, /* 40 - ( */ 287 TK_OP_ID, /* 41 - ) */ 288 TK_OP_ID, /* 42 - * */ 289 TK_OP_ID, /* 43 - + */ 290 TK_OP_ID, /* 44 - , */ 291 TK_DASH, /* 45 - - */ 292 TK_OP_ID, /* 46 - . */ 293 TK_OP_ID, /* 47 - / */ 294 TK_OP_ID, /* 48 - 0 */ 295 TK_OP_ID, /* 49 - 1 */ 296 TK_OP_ID, /* 50 - 2 */ 297 TK_OP_ID, /* 51 - 3 */ 298 TK_OP_ID, /* 52 - 4 */ 299 TK_OP_ID, /* 53 - 5 */ 300 TK_OP_ID, /* 54 - 6 */ 301 TK_OP_ID, /* 55 - 7 */ 302 TK_OP_ID, /* 56 - 8 */ 303 TK_OP_ID, /* 57 - 9 */ 304 TK_COLON, /* 58 - : */ 305 TK_SEMICOLON, /* 59 - ; */ 306 TK_OP_ID, /* 60 - < */ 307 TK_EQUAL, /* 61 - = */ 308 TK_OP_ID, /* 62 - > */ 309 TK_OP_ID, /* 63 - ? */ 310 TK_ATSIGN, /* 64 - @ */ 311 TK_OP_ID, /* 65 - A */ 312 TK_OP_ID, /* 66 - B */ 313 TK_OP_ID, /* 67 - C */ 314 TK_OP_ID, /* 68 - D */ 315 TK_OP_ID, /* 69 - E */ 316 TK_OP_ID, /* 70 - F */ 317 TK_OP_ID, /* 71 - G */ 318 TK_OP_ID, /* 72 - H */ 319 TK_OP_ID, /* 73 - I */ 320 TK_OP_ID, /* 74 - J */ 321 TK_OP_ID, /* 75 - K */ 322 TK_OP_ID, /* 76 - L */ 323 TK_OP_ID, /* 77 - M */ 324 TK_OP_ID, /* 78 - N */ 325 TK_OP_ID, /* 79 - O */ 326 TK_OP_ID, /* 80 - P */ 327 TK_OP_ID, /* 81 - Q */ 328 TK_OP_ID, /* 82 - R */ 329 TK_OP_ID, /* 83 - S */ 330 TK_OP_ID, /* 84 - T */ 331 TK_OP_ID, /* 85 - U */ 332 TK_OP_ID, /* 86 - V */ 333 TK_OP_ID, /* 87 - W */ 334 TK_OP_ID, /* 88 - X */ 335 TK_OP_ID, /* 89 - Y */ 336 TK_OP_ID, /* 90 - Z */ 337 TK_OP_ID, /* 91 - [ */ 338 TK_OP_ID, /* 92 - \ */ 339 TK_OP_ID, /* 93 - ] */ 340 TK_OP_ID, /* 94 - ^ */ 341 TK_OP_ID, /* 95 - _ */ 342 TK_OP_ID, /* 96 - ` */ 343 TK_OP_ID, /* 97 - a */ 344 TK_OP_ID, /* 98 - b */ 345 TK_OP_ID, /* 99 - c */ 346 TK_OP_ID, /* 100 - d */ 347 TK_OP_ID, /* 101 - e */ 348 TK_OP_ID, /* 102 - f */ 349 TK_OP_ID, /* 103 - g */ 350 TK_OP_ID, /* 104 - h */ 351 TK_OP_ID, /* 105 - i */ 352 TK_OP_ID, /* 106 - j */ 353 TK_OP_ID, /* 107 - k */ 354 TK_OP_ID, /* 108 - l */ 355 TK_OP_ID, /* 109 - m */ 356 TK_OP_ID, /* 110 - n */ 357 TK_OP_ID, /* 111 - o */ 358 TK_OP_ID, /* 112 - p */ 359 TK_OP_ID, /* 113 - q */ 360 TK_OP_ID, /* 114 - r */ 361 TK_OP_ID, /* 115 - s */ 362 TK_OP_ID, /* 116 - t */ 363 TK_OP_ID, /* 117 - u */ 364 TK_OP_ID, /* 118 - v */ 365 TK_OP_ID, /* 119 - w */ 366 TK_OP_ID, /* 120 - x */ 367 TK_OP_ID, /* 121 - y */ 368 TK_OP_ID, /* 122 - z */ 369 TK_LEFTBKT, /* 123 - { */ 370 TK_PIPE, /* 124 - | */ 371 TK_RIGHTBKT, /* 125 - } */ 372 TK_OP_ID, /* 126 - ~ */ 373 TK_OP_ILLCHR, /* 127 - DEL */ 374 }; 375 376 /* 377 * Version 2 syntax dispatch table for ld_map_gettoken(). For each of the 378 * 7-bit ASCII characters, determine how the lexical analyzer should behave. 379 * 380 * This table must be kept in sync with tkid_attr[] below. 381 * 382 * Identifier Note: 383 * We define a letter as being one of the character [A-Z], [a-z], or [_%/.] 384 * A digit is the numbers [0-9], or [$-]. An unquoted identifier is defined 385 * as a letter, followed by any number of letters or digits. This is a loosened 386 * version of the C definition of an identifier. The extra characters not 387 * allowed by C are common in section names and/or file paths. 388 */ 389 static const mf_tokdisp_t gettok_dispatch_v2 = { 390 TK_OP_EOF, /* 0 - NUL */ 391 TK_OP_ILLCHR, /* 1 - SOH */ 392 TK_OP_ILLCHR, /* 2 - STX */ 393 TK_OP_ILLCHR, /* 3 - ETX */ 394 TK_OP_ILLCHR, /* 4 - EOT */ 395 TK_OP_ILLCHR, /* 5 - ENQ */ 396 TK_OP_ILLCHR, /* 6 - ACK */ 397 TK_OP_ILLCHR, /* 7 - BEL */ 398 TK_OP_ILLCHR, /* 8 - BS */ 399 TK_OP_WS, /* 9 - HT */ 400 TK_OP_NL, /* 10 - NL */ 401 TK_OP_WS, /* 11 - VT */ 402 TK_OP_WS, /* 12 - FF */ 403 TK_OP_WS, /* 13 - CR */ 404 TK_OP_ILLCHR, /* 14 - SO */ 405 TK_OP_ILLCHR, /* 15 - SI */ 406 TK_OP_ILLCHR, /* 16 - DLE */ 407 TK_OP_ILLCHR, /* 17 - DC1 */ 408 TK_OP_ILLCHR, /* 18 - DC2 */ 409 TK_OP_ILLCHR, /* 19 - DC3 */ 410 TK_OP_ILLCHR, /* 20 - DC4 */ 411 TK_OP_ILLCHR, /* 21 - NAK */ 412 TK_OP_ILLCHR, /* 22 - SYN */ 413 TK_OP_ILLCHR, /* 23 - ETB */ 414 TK_OP_ILLCHR, /* 24 - CAN */ 415 TK_OP_ILLCHR, /* 25 - EM */ 416 TK_OP_ILLCHR, /* 26 - SUB */ 417 TK_OP_ILLCHR, /* 27 - ESC */ 418 TK_OP_ILLCHR, /* 28 - FS */ 419 TK_OP_ILLCHR, /* 29 - GS */ 420 TK_OP_ILLCHR, /* 30 - RS */ 421 TK_OP_ILLCHR, /* 31 - US */ 422 TK_OP_WS, /* 32 - SP */ 423 TK_BANG, /* 33 - ! */ 424 TK_OP_CQUOTE, /* 34 - " */ 425 TK_OP_CMT, /* 35 - # */ 426 TK_OP_CDIR, /* 36 - $ */ 427 TK_OP_ID, /* 37 - % */ 428 TK_OP_BADCHR, /* 38 - & */ 429 TK_OP_SIMQUOTE, /* 39 - ' */ 430 TK_OP_BADCHR, /* 40 - ( */ 431 TK_OP_BADCHR, /* 41 - ) */ 432 TK_STAR, /* 42 - * */ 433 TK_OP_CEQUAL, /* 43 - + */ 434 TK_OP_BADCHR, /* 44 - , */ 435 TK_OP_CEQUAL, /* 45 - - */ 436 TK_OP_ID, /* 46 - . */ 437 TK_OP_ID, /* 47 - / */ 438 TK_OP_NUM, /* 48 - 0 */ 439 TK_OP_NUM, /* 49 - 1 */ 440 TK_OP_NUM, /* 50 - 2 */ 441 TK_OP_NUM, /* 51 - 3 */ 442 TK_OP_NUM, /* 52 - 4 */ 443 TK_OP_NUM, /* 53 - 5 */ 444 TK_OP_NUM, /* 54 - 6 */ 445 TK_OP_NUM, /* 55 - 7 */ 446 TK_OP_NUM, /* 56 - 8 */ 447 TK_OP_NUM, /* 57 - 9 */ 448 TK_COLON, /* 58 - : */ 449 TK_SEMICOLON, /* 59 - ; */ 450 TK_OP_BADCHR, /* 60 - < */ 451 TK_EQUAL, /* 61 - = */ 452 TK_OP_BADCHR, /* 62 - > */ 453 TK_OP_BADCHR, /* 63 - ? */ 454 TK_OP_BADCHR, /* 64 - @ */ 455 TK_OP_ID, /* 65 - A */ 456 TK_OP_ID, /* 66 - B */ 457 TK_OP_ID, /* 67 - C */ 458 TK_OP_ID, /* 68 - D */ 459 TK_OP_ID, /* 69 - E */ 460 TK_OP_ID, /* 70 - F */ 461 TK_OP_ID, /* 71 - G */ 462 TK_OP_ID, /* 72 - H */ 463 TK_OP_ID, /* 73 - I */ 464 TK_OP_ID, /* 74 - J */ 465 TK_OP_ID, /* 75 - K */ 466 TK_OP_ID, /* 76 - L */ 467 TK_OP_ID, /* 77 - M */ 468 TK_OP_ID, /* 78 - N */ 469 TK_OP_ID, /* 79 - O */ 470 TK_OP_ID, /* 80 - P */ 471 TK_OP_ID, /* 81 - Q */ 472 TK_OP_ID, /* 82 - R */ 473 TK_OP_ID, /* 83 - S */ 474 TK_OP_ID, /* 84 - T */ 475 TK_OP_ID, /* 85 - U */ 476 TK_OP_ID, /* 86 - V */ 477 TK_OP_ID, /* 87 - W */ 478 TK_OP_ID, /* 88 - X */ 479 TK_OP_ID, /* 89 - Y */ 480 TK_OP_ID, /* 90 - Z */ 481 TK_LEFTSQR, /* 91 - [ */ 482 TK_OP_BADCHR, /* 92 - \ */ 483 TK_RIGHTSQR, /* 93 - ] */ 484 TK_OP_BADCHR, /* 94 - ^ */ 485 TK_OP_ID, /* 95 - _ */ 486 TK_OP_BADCHR, /* 96 - ` */ 487 TK_OP_ID, /* 97 - a */ 488 TK_OP_ID, /* 98 - b */ 489 TK_OP_ID, /* 99 - c */ 490 TK_OP_ID, /* 100 - d */ 491 TK_OP_ID, /* 101 - e */ 492 TK_OP_ID, /* 102 - f */ 493 TK_OP_ID, /* 103 - g */ 494 TK_OP_ID, /* 104 - h */ 495 TK_OP_ID, /* 105 - i */ 496 TK_OP_ID, /* 106 - j */ 497 TK_OP_ID, /* 107 - k */ 498 TK_OP_ID, /* 108 - l */ 499 TK_OP_ID, /* 109 - m */ 500 TK_OP_ID, /* 110 - n */ 501 TK_OP_ID, /* 111 - o */ 502 TK_OP_ID, /* 112 - p */ 503 TK_OP_ID, /* 113 - q */ 504 TK_OP_ID, /* 114 - r */ 505 TK_OP_ID, /* 115 - s */ 506 TK_OP_ID, /* 116 - t */ 507 TK_OP_ID, /* 117 - u */ 508 TK_OP_ID, /* 118 - v */ 509 TK_OP_ID, /* 119 - w */ 510 TK_OP_ID, /* 120 - x */ 511 TK_OP_ID, /* 121 - y */ 512 TK_OP_ID, /* 122 - z */ 513 TK_LEFTBKT, /* 123 - { */ 514 TK_OP_BADCHR, /* 124 - | */ 515 TK_RIGHTBKT, /* 125 - } */ 516 TK_OP_BADCHR, /* 126 - ~ */ 517 TK_OP_ILLCHR, /* 127 - DEL */ 518 }; 519 520 521 /* 522 * Table used to identify unquoted identifiers. Each element of this array 523 * contains a bitmask indicating whether the character it represents starts, 524 * or continues an identifier, for each supported mapfile syntax version. 525 */ 526 static const char tkid_attr[128] = { 527 0, /* 0 - NUL */ 528 TKID_ATTR_CONT(1), /* 1 - SOH */ 529 TKID_ATTR_CONT(1), /* 2 - STX */ 530 TKID_ATTR_CONT(1), /* 3 - ETX */ 531 TKID_ATTR_CONT(1), /* 4 - EOT */ 532 TKID_ATTR_CONT(1), /* 5 - ENQ */ 533 TKID_ATTR_CONT(1), /* 6 - ACK */ 534 TKID_ATTR_CONT(1), /* 7 - BEL */ 535 TKID_ATTR_CONT(1), /* 8 - BS */ 536 0, /* 9 - HT */ 537 0, /* 10 - NL */ 538 TKID_ATTR_CONT(1), /* 11 - VT */ 539 TKID_ATTR_CONT(1), /* 12 - FF */ 540 TKID_ATTR_CONT(1), /* 13 - CR */ 541 TKID_ATTR_CONT(1), /* 14 - SO */ 542 TKID_ATTR_CONT(1), /* 15 - SI */ 543 TKID_ATTR_CONT(1), /* 16 - DLE */ 544 TKID_ATTR_CONT(1), /* 17 - DC1 */ 545 TKID_ATTR_CONT(1), /* 18 - DC2 */ 546 TKID_ATTR_CONT(1), /* 19 - DC3 */ 547 TKID_ATTR_CONT(1), /* 20 - DC4 */ 548 TKID_ATTR_CONT(1), /* 21 - NAK */ 549 TKID_ATTR_CONT(1), /* 22 - SYN */ 550 TKID_ATTR_CONT(1), /* 23 - ETB */ 551 TKID_ATTR_CONT(1), /* 24 - CAN */ 552 TKID_ATTR_CONT(1), /* 25 - EM */ 553 TKID_ATTR_CONT(1), /* 26 - SUB */ 554 TKID_ATTR_CONT(1), /* 27 - ESC */ 555 TKID_ATTR_CONT(1), /* 28 - FS */ 556 TKID_ATTR_CONT(1), /* 29 - GS */ 557 TKID_ATTR_CONT(1), /* 30 - RS */ 558 TKID_ATTR_CONT(1), /* 31 - US */ 559 0, /* 32 - SP */ 560 TKID_ATTR(1), /* 33 - ! */ 561 0, /* 34 - " */ 562 0, /* 35 - # */ 563 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 36 - $ */ 564 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 37 - % */ 565 TKID_ATTR(1), /* 38 - & */ 566 TKID_ATTR(1), /* 39 - ' */ 567 TKID_ATTR(1), /* 40 - ( */ 568 TKID_ATTR(1), /* 41 - ) */ 569 TKID_ATTR(1), /* 42 - * */ 570 TKID_ATTR(1), /* 43 - + */ 571 TKID_ATTR(1), /* 44 - , */ 572 TKID_ATTR_CONT(1) | TKID_ATTR_CONT(2), /* 45 - - */ 573 TKID_ATTR(1) | TKID_ATTR(2), /* 46 - . */ 574 TKID_ATTR(1) | TKID_ATTR(2), /* 47 - / */ 575 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 48 - 0 */ 576 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 49 - 1 */ 577 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 50 - 2 */ 578 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 51 - 3 */ 579 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 52 - 4 */ 580 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 53 - 5 */ 581 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 54 - 6 */ 582 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 55 - 7 */ 583 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 56 - 8 */ 584 TKID_ATTR(1) | TKID_ATTR_CONT(2), /* 57 - 9 */ 585 0, /* 58 - : */ 586 0, /* 59 - ; */ 587 TKID_ATTR(1), /* 60 - < */ 588 0, /* 61 - = */ 589 TKID_ATTR(1), /* 62 - > */ 590 TKID_ATTR(1), /* 63 - ? */ 591 TKID_ATTR_CONT(1), /* 64 - @ */ 592 TKID_ATTR(1) | TKID_ATTR(2), /* 65 - A */ 593 TKID_ATTR(1) | TKID_ATTR(2), /* 66 - B */ 594 TKID_ATTR(1) | TKID_ATTR(2), /* 67 - C */ 595 TKID_ATTR(1) | TKID_ATTR(2), /* 68 - D */ 596 TKID_ATTR(1) | TKID_ATTR(2), /* 69 - E */ 597 TKID_ATTR(1) | TKID_ATTR(2), /* 70 - F */ 598 TKID_ATTR(1) | TKID_ATTR(2), /* 71 - G */ 599 TKID_ATTR(1) | TKID_ATTR(2), /* 72 - H */ 600 TKID_ATTR(1) | TKID_ATTR(2), /* 73 - I */ 601 TKID_ATTR(1) | TKID_ATTR(2), /* 74 - J */ 602 TKID_ATTR(1) | TKID_ATTR(2), /* 75 - K */ 603 TKID_ATTR(1) | TKID_ATTR(2), /* 76 - L */ 604 TKID_ATTR(1) | TKID_ATTR(2), /* 77 - M */ 605 TKID_ATTR(1) | TKID_ATTR(2), /* 78 - N */ 606 TKID_ATTR(1) | TKID_ATTR(2), /* 79 - O */ 607 TKID_ATTR(1) | TKID_ATTR(2), /* 80 - P */ 608 TKID_ATTR(1) | TKID_ATTR(2), /* 81 - Q */ 609 TKID_ATTR(1) | TKID_ATTR(2), /* 82 - R */ 610 TKID_ATTR(1) | TKID_ATTR(2), /* 83 - S */ 611 TKID_ATTR(1) | TKID_ATTR(2), /* 84 - T */ 612 TKID_ATTR(1) | TKID_ATTR(2), /* 85 - U */ 613 TKID_ATTR(1) | TKID_ATTR(2), /* 86 - V */ 614 TKID_ATTR(1) | TKID_ATTR(2), /* 87 - W */ 615 TKID_ATTR(1) | TKID_ATTR(2), /* 88 - X */ 616 TKID_ATTR(1) | TKID_ATTR(2), /* 89 - Y */ 617 TKID_ATTR(1) | TKID_ATTR(2), /* 90 - Z */ 618 TKID_ATTR(1), /* 91 - [ */ 619 TKID_ATTR(1), /* 92 - \ */ 620 TKID_ATTR(1), /* 93 - ] */ 621 TKID_ATTR(1), /* 94 - ^ */ 622 TKID_ATTR(1) | TKID_ATTR(2), /* 95 - _ */ 623 TKID_ATTR(1), /* 96 - ` */ 624 TKID_ATTR(1) | TKID_ATTR(2), /* 97 - a */ 625 TKID_ATTR(1) | TKID_ATTR(2), /* 98 - b */ 626 TKID_ATTR(1) | TKID_ATTR(2), /* 99 - c */ 627 TKID_ATTR(1) | TKID_ATTR(2), /* 100 - d */ 628 TKID_ATTR(1) | TKID_ATTR(2), /* 101 - e */ 629 TKID_ATTR(1) | TKID_ATTR(2), /* 102 - f */ 630 TKID_ATTR(1) | TKID_ATTR(2), /* 103 - g */ 631 TKID_ATTR(1) | TKID_ATTR(2), /* 104 - h */ 632 TKID_ATTR(1) | TKID_ATTR(2), /* 105 - i */ 633 TKID_ATTR(1) | TKID_ATTR(2), /* 106 - j */ 634 TKID_ATTR(1) | TKID_ATTR(2), /* 107 - k */ 635 TKID_ATTR(1) | TKID_ATTR(2), /* 108 - l */ 636 TKID_ATTR(1) | TKID_ATTR(2), /* 109 - m */ 637 TKID_ATTR(1) | TKID_ATTR(2), /* 110 - n */ 638 TKID_ATTR(1) | TKID_ATTR(2), /* 111 - o */ 639 TKID_ATTR(1) | TKID_ATTR(2), /* 112 - p */ 640 TKID_ATTR(1) | TKID_ATTR(2), /* 113 - q */ 641 TKID_ATTR(1) | TKID_ATTR(2), /* 114 - r */ 642 TKID_ATTR(1) | TKID_ATTR(2), /* 115 - s */ 643 TKID_ATTR(1) | TKID_ATTR(2), /* 116 - t */ 644 TKID_ATTR(1) | TKID_ATTR(2), /* 117 - u */ 645 TKID_ATTR(1) | TKID_ATTR(2), /* 118 - v */ 646 TKID_ATTR(1) | TKID_ATTR(2), /* 119 - w */ 647 TKID_ATTR(1) | TKID_ATTR(2), /* 120 - x */ 648 TKID_ATTR(1) | TKID_ATTR(2), /* 121 - y */ 649 TKID_ATTR(1) | TKID_ATTR(2), /* 122 - z */ 650 TKID_ATTR_CONT(1), /* 123 - { */ 651 TKID_ATTR_CONT(1), /* 124 - | */ 652 TKID_ATTR_CONT(1), /* 125 - } */ 653 TKID_ATTR(1), /* 126 - ~ */ 654 TKID_ATTR_CONT(1), /* 127 - DEL */ 655 }; 656 657 658 /* 659 * Advance the given string pointer to the next newline character, 660 * or the terminating NULL if there is none. 661 */ 662 inline static void 663 advance_to_eol(char **str) 664 { 665 char *s = *str; 666 667 while ((*s != '\n') && (*s != '\0')) 668 s++; 669 *str = s; 670 } 671 672 /* 673 * Insert a NULL patch at the given address 674 */ 675 inline static void 676 null_patch_set(char *str, ld_map_npatch_t *np) 677 { 678 np->np_ptr = str; 679 np->np_ch = *str; 680 *str = '\0'; 681 } 682 683 /* 684 * Undo a NULL patch 685 */ 686 inline static void 687 null_patch_undo(ld_map_npatch_t *np) 688 { 689 *np->np_ptr = np->np_ch; 690 } 691 692 /* 693 * Insert a NULL patch at the end of the line containing str. 694 */ 695 static void 696 null_patch_eol(char *str, ld_map_npatch_t *np) 697 { 698 advance_to_eol(&str); 699 null_patch_set(str, np); 700 } 701 702 /* 703 * Locate the end of an unquoted identifier. 704 * 705 * entry: 706 * mf - Mapfile descriptor, positioned to first character 707 * of identifier. 708 * 709 * exit: 710 * If the item pointed at by mf is not an identifier, returns NULL. 711 * Otherwise, returns pointer to character after the last character 712 * of the identifier. 713 */ 714 inline static char * 715 ident_delimit(Mapfile *mf) 716 { 717 char *str = mf->mf_next; 718 ld_map_npatch_t np; 719 int c = *str++; 720 721 /* If not a valid start character, report the error */ 722 if ((c & 0x80) || !(tkid_attr[c] & mf->mf_tkid_start)) { 723 null_patch_set(str, &np); 724 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCHAR), str); 725 null_patch_undo(&np); 726 return (NULL); 727 } 728 729 /* Keep going until we hit a non-continuing character */ 730 for (c = *str; !(c & 0x80) && (tkid_attr[c] & mf->mf_tkid_cont); 731 c = *++str) 732 ; 733 734 return (str); 735 } 736 737 /* 738 * Allocate memory for a stack. 739 * 740 * entry: 741 * stack - Pointer to stack for which memory is required, cast 742 * to the generic stack type. 743 * n_default - Size to use for initial allocation. 744 * elt_size - sizeof(elt), where elt is the actual stack data type. 745 * 746 * exit: 747 * Returns (1) on success. On error (memory allocation), a message 748 * is printed and False (0) is returned. 749 * 750 * note: 751 * The caller casts the pointer to their actual datatype-specific stack 752 * to be a (generic_stack_t *). The C language will give all stack 753 * structs the same size and layout as long as the underlying platform 754 * uses a single integral type for pointers. Hence, this cast is safe, 755 * and lets a generic routine modify data-specific types without being 756 * aware of those types. 757 */ 758 static Boolean 759 stack_resize(generic_stack_t *stack, size_t n_default, size_t elt_size) 760 { 761 size_t new_n_alloc; 762 void *newaddr; 763 764 /* Use initial size first, and double the allocation on each call */ 765 new_n_alloc = (stack->stk_n_alloc == 0) ? 766 n_default : (stack->stk_n_alloc * 2); 767 768 newaddr = libld_realloc(stack->stk_s, new_n_alloc * elt_size); 769 if (newaddr == NULL) 770 return (FALSE); 771 772 stack->stk_s = newaddr; 773 stack->stk_n_alloc = new_n_alloc; 774 return (TRUE); 775 } 776 777 /* 778 * AVL comparison function for cexp_id_node_t items. 779 * 780 * entry: 781 * n1, n2 - pointers to nodes to be compared 782 * 783 * exit: 784 * Returns -1 if (n1 < n2), 0 if they are equal, and 1 if (n1 > n2) 785 */ 786 static int 787 cexp_ident_cmp(const void *n1, const void *n2) 788 { 789 int rc; 790 791 rc = strcmp(((cexp_id_node_t *)n1)->ceid_name, 792 ((cexp_id_node_t *)n2)->ceid_name); 793 794 if (rc > 0) 795 return (1); 796 if (rc < 0) 797 return (-1); 798 return (0); 799 } 800 801 802 /* 803 * Returns True (1) if name is in the conditional expression identifier 804 * AVL tree, and False (0) otherwise. 805 */ 806 static int 807 cexp_ident_test(const char *name) 808 { 809 cexp_id_node_t node; 810 811 node.ceid_name = name; 812 return (avl_find(lms.lms_cexp_id, &node, 0) != NULL); 813 } 814 815 /* 816 * Add a new boolean identifier to the conditional expression identifier 817 * AVL tree. 818 * 819 * entry: 820 * mf - If non-NULL, the mapfile descriptor for the mapfile 821 * containing the $add directive. NULL if this is an 822 * initialization call. 823 * name - Name of identifier. Must point at stable storage that will 824 * not be moved or modified by the caller following this call. 825 * 826 * exit: 827 * On success, True (1) is returned and name has been entered. 828 * On failure, False (0) is returned and an error has been printed. 829 */ 830 static int 831 cexp_ident_add(Mapfile *mf, const char *name) 832 { 833 cexp_id_node_t *node; 834 835 if (mf != NULL) { 836 DBG_CALL(Dbg_map_cexp_id(mf->mf_ofl->ofl_lml, 1, 837 mf->mf_name, mf->mf_lineno, name)); 838 839 /* If is already known, don't do it again */ 840 if (cexp_ident_test(name)) 841 return (1); 842 } 843 844 if ((node = libld_calloc(1, sizeof (*node))) == NULL) 845 return (0); 846 node->ceid_name = name; 847 avl_add(lms.lms_cexp_id, node); 848 return (1); 849 } 850 851 /* 852 * Remove a boolean identifier from the conditional expression identifier 853 * AVL tree. 854 * 855 * entry: 856 * mf - Mapfile descriptor 857 * name - Name of identifier. 858 * 859 * exit: 860 * If the name was in the tree, it has been removed. If not, 861 * then this routine quietly returns. 862 */ 863 static void 864 cexp_ident_clear(Mapfile *mf, const char *name) 865 { 866 cexp_id_node_t node; 867 cexp_id_node_t *real_node; 868 869 DBG_CALL(Dbg_map_cexp_id(mf->mf_ofl->ofl_lml, 0, 870 mf->mf_name, mf->mf_lineno, name)); 871 872 node.ceid_name = name; 873 real_node = avl_find(lms.lms_cexp_id, &node, 0); 874 if (real_node != NULL) 875 avl_remove(lms.lms_cexp_id, real_node); 876 } 877 878 /* 879 * Initialize the AVL tree that holds the names of the currently defined 880 * boolean identifiers for conditional expressions ($if/$elif). 881 * 882 * entry: 883 * ofl - Output file descriptor 884 * 885 * exit: 886 * On success, TRUE (1) is returned and lms.lms_cexp_id is ready for use. 887 * On failure, FALSE (0) is returned. 888 */ 889 static Boolean 890 cexp_ident_init(void) 891 { 892 /* If already done, use it */ 893 if (lms.lms_cexp_id != NULL) 894 return (TRUE); 895 896 lms.lms_cexp_id = libld_calloc(1, sizeof (*lms.lms_cexp_id)); 897 if (lms.lms_cexp_id == NULL) 898 return (FALSE); 899 avl_create(lms.lms_cexp_id, cexp_ident_cmp, sizeof (cexp_id_node_t), 900 SGSOFFSETOF(cexp_id_node_t, ceid_avlnode)); 901 902 903 /* ELFCLASS */ 904 if (cexp_ident_add(NULL, (ld_targ.t_m.m_class == ELFCLASS32) ? 905 MSG_ORIG(MSG_STR_UELF32) : MSG_ORIG(MSG_STR_UELF64)) == 0) 906 return (FALSE); 907 908 /* Machine */ 909 switch (ld_targ.t_m.m_mach) { 910 case EM_386: 911 case EM_AMD64: 912 if (cexp_ident_add(NULL, MSG_ORIG(MSG_STR_UX86)) == 0) 913 return (FALSE); 914 break; 915 916 case EM_SPARC: 917 case EM_SPARCV9: 918 if (cexp_ident_add(NULL, MSG_ORIG(MSG_STR_USPARC)) == 0) 919 return (FALSE); 920 break; 921 } 922 923 /* true is always defined */ 924 if (cexp_ident_add(NULL, MSG_ORIG(MSG_STR_TRUE)) == 0) 925 return (FALSE); 926 927 return (TRUE); 928 } 929 930 /* 931 * Validate the string starting at mf->mf_next as being a 932 * boolean conditional expression identifier. 933 * 934 * entry: 935 * mf - Mapfile descriptor 936 * len - NULL, or address of variable to receive strlen() of identifier 937 * directive - If (len == NULL), string giving name of directive being 938 * processed. Ignored if (len != NULL). 939 * 940 * exit: 941 * On success: 942 * - If len is NULL, a NULL is inserted following the final 943 * character of the identifier, and the remainder of the string 944 * is tested to ensure it is empty, or only contains whitespace. 945 * - If len is non-NULL, *len is set to the number of characters 946 * in the identifier, and the rest of the string is not modified. 947 * - TRUE (1) is returned 948 * 949 * On failure, returns FALSE (0). 950 */ 951 static Boolean 952 cexp_ident_validate(Mapfile *mf, size_t *len, const char *directive) 953 { 954 char *tail; 955 956 if ((tail = ident_delimit(mf)) == NULL) 957 return (FALSE); 958 959 /* 960 * If len is non-NULL, we simple count the number of characters 961 * consumed by the identifier and are done. If len is NULL, then 962 * ensure there's nothing left but whitespace, and NULL terminate 963 * the identifier to remove it. 964 */ 965 if (len != NULL) { 966 *len = tail - mf->mf_next; 967 } else if (*tail != '\0') { 968 *tail++ = '\0'; 969 while (isspace(*tail)) 970 tail++; 971 if (*tail != '\0') { 972 mf_fatal(mf, MSG_INTL(MSG_MAP_BADEXTRA), directive); 973 return (FALSE); 974 } 975 } 976 977 return (TRUE); 978 } 979 980 /* 981 * Push a new operator onto the conditional expression operator stack. 982 * 983 * entry: 984 * mf - Mapfile descriptor 985 * op - Operator to push 986 * 987 * exit: 988 * On success, TRUE (1) is returned, otherwise FALSE (0). 989 */ 990 static Boolean 991 cexp_push_op(cexp_op_t op) 992 { 993 if (STACK_RESERVE(lms.lms_cexp_op_stack, CEXP_OP_STACK_INIT) == 0) 994 return (FALSE); 995 996 STACK_PUSH(lms.lms_cexp_op_stack) = op; 997 return (TRUE); 998 } 999 1000 /* 1001 * Evaluate the basic operator (non-paren) at the top of lms.lms_cexp_op_stack, 1002 * and push the results on lms.lms_cexp_val_stack. 1003 * 1004 * exit: 1005 * On success, returns TRUE (1). On error, FALSE (0) is returned, 1006 * and the caller is responsible for issuing the error. 1007 */ 1008 static Boolean 1009 cexp_eval_op(void) 1010 { 1011 cexp_op_t op; 1012 uchar_t val; 1013 1014 op = STACK_POP(lms.lms_cexp_op_stack); 1015 switch (op) { 1016 case CEXP_OP_AND: 1017 if (lms.lms_cexp_val_stack.stk_n < 2) 1018 return (FALSE); 1019 val = STACK_POP(lms.lms_cexp_val_stack); 1020 STACK_TOP(lms.lms_cexp_val_stack) = val && 1021 STACK_TOP(lms.lms_cexp_val_stack); 1022 break; 1023 1024 case CEXP_OP_OR: 1025 if (lms.lms_cexp_val_stack.stk_n < 2) 1026 return (FALSE); 1027 val = STACK_POP(lms.lms_cexp_val_stack); 1028 STACK_TOP(lms.lms_cexp_val_stack) = val || 1029 STACK_TOP(lms.lms_cexp_val_stack); 1030 break; 1031 1032 case CEXP_OP_NEG: 1033 if (lms.lms_cexp_val_stack.stk_n < 1) 1034 return (FALSE); 1035 STACK_TOP(lms.lms_cexp_val_stack) = 1036 !STACK_TOP(lms.lms_cexp_val_stack); 1037 break; 1038 default: 1039 return (FALSE); 1040 } 1041 1042 return (TRUE); 1043 } 1044 1045 /* 1046 * Evaluate an expression for a $if/$elif control directive. 1047 * 1048 * entry: 1049 * mf - Mapfile descriptor for NULL terminated string 1050 * containing the expression. 1051 * 1052 * exit: 1053 * The contents of str are modified by this routine. 1054 * One of the following values are returned: 1055 * -1 Syntax error encountered (an error is printed) 1056 * 0 The expression evaluates to False 1057 * 1 The expression evaluates to True. 1058 * 1059 * note: 1060 * A simplified version of Dijkstra's Shunting Yard algorithm is used 1061 * to convert this syntax into postfix form and then evaluate it. 1062 * Our version has no functions and a tiny set of operators. 1063 * 1064 * The expressions consist of boolean identifiers, which can be 1065 * combined using the following operators, listed from highest 1066 * precedence to least: 1067 * 1068 * Operator Meaning 1069 * ------------------------------------------------- 1070 * (expr) sub-expression, non-associative 1071 * ! logical negation, prefix, left associative 1072 * && || logical and/or, binary, left associative 1073 * 1074 * The operands manipulated by these operators are names, consisting of 1075 * a sequence of letters and digits. The first character must be a letter. 1076 * Underscore (_) and period (.) are also considered to be characters. 1077 * An operand is considered True if it is found in our set of known 1078 * names (lms.lms_cexp_id), and False otherwise. 1079 * 1080 * The Shunting Yard algorithm works using two stacks, one for operators, 1081 * and a second for operands. The infix input expression is tokenized from 1082 * left to right and processed in order. Issues of associativity and 1083 * precedence are managed by reducing (poping and evaluating) items with 1084 * higer precedence before pushing additional tokens with lower precedence. 1085 */ 1086 static int 1087 cexp_eval_expr(Mapfile *mf) 1088 { 1089 char *ident; 1090 size_t len; 1091 cexp_op_t new_op = CEXP_OP_AND; /* to catch binop at start */ 1092 ld_map_npatch_t np; 1093 char *str = mf->mf_next; 1094 1095 STACK_RESET(lms.lms_cexp_op_stack); 1096 STACK_RESET(lms.lms_cexp_val_stack); 1097 1098 for (; *str; str++) { 1099 1100 /* Skip whitespace */ 1101 while (isspace(*str)) 1102 str++; 1103 if (!*str) 1104 break; 1105 1106 switch (*str) { 1107 case '&': 1108 case '|': 1109 if (*(str + 1) != *str) 1110 goto token_error; 1111 if ((new_op != CEXP_OP_NONE) && 1112 (new_op != CEXP_OP_CPAR)) { 1113 mf_fatal0(mf, MSG_INTL(MSG_MAP_CEXP_BADOPUSE)); 1114 return (-1); 1115 } 1116 str++; 1117 1118 /* 1119 * As this is a left associative binary operator, we 1120 * need to process all operators of equal or higher 1121 * precedence before pushing the new operator. 1122 */ 1123 while (!STACK_IS_EMPTY(lms.lms_cexp_op_stack)) { 1124 cexp_op_t op = STACK_TOP(lms.lms_cexp_op_stack); 1125 1126 1127 if ((op != CEXP_OP_AND) && (op != CEXP_OP_OR) && 1128 (op != CEXP_OP_NEG)) 1129 break; 1130 1131 if (!cexp_eval_op()) 1132 goto semantic_error; 1133 } 1134 1135 new_op = (*str == '&') ? CEXP_OP_AND : CEXP_OP_OR; 1136 if (!cexp_push_op(new_op)) 1137 return (-1); 1138 break; 1139 1140 case '!': 1141 new_op = CEXP_OP_NEG; 1142 if (!cexp_push_op(new_op)) 1143 return (-1); 1144 break; 1145 1146 case '(': 1147 new_op = CEXP_OP_OPAR; 1148 if (!cexp_push_op(new_op)) 1149 return (-1); 1150 break; 1151 1152 case ')': 1153 new_op = CEXP_OP_CPAR; 1154 1155 /* Evaluate the operator stack until reach '(' */ 1156 while (!STACK_IS_EMPTY(lms.lms_cexp_op_stack) && 1157 (STACK_TOP(lms.lms_cexp_op_stack) != CEXP_OP_OPAR)) 1158 if (!cexp_eval_op()) 1159 goto semantic_error; 1160 1161 /* 1162 * If the top of operator stack is not an open paren, 1163 * when we have an error. In this case, the operator 1164 * stack will be empty due to the loop above. 1165 */ 1166 if (STACK_IS_EMPTY(lms.lms_cexp_op_stack)) 1167 goto unbalpar_error; 1168 lms.lms_cexp_op_stack.stk_n--; /* Pop OPAR */ 1169 break; 1170 1171 default: 1172 /* Ensure there's room to push another operand */ 1173 if (STACK_RESERVE(lms.lms_cexp_val_stack, 1174 CEXP_VAL_STACK_INIT) == 0) 1175 return (0); 1176 new_op = CEXP_OP_NONE; 1177 1178 /* 1179 * Operands cannot be numbers. However, we accept two 1180 * special cases: '0' means false, and '1' is true. 1181 * This is done to support the common C idiom of 1182 * '#if 1' and '#if 0' to conditionalize code under 1183 * development. 1184 */ 1185 if ((*str == '0') || (*str == '1')) { 1186 STACK_PUSH(lms.lms_cexp_val_stack) = 1187 (*str == '1'); 1188 break; 1189 } 1190 1191 /* Look up the identifier */ 1192 ident = mf->mf_next = str; 1193 if (!cexp_ident_validate(mf, &len, NULL)) 1194 return (-1); 1195 str += len - 1; /* loop will advance past final ch */ 1196 null_patch_set(&ident[len], &np); 1197 STACK_PUSH(lms.lms_cexp_val_stack) = 1198 cexp_ident_test(ident); 1199 null_patch_undo(&np); 1200 1201 break; 1202 } 1203 } 1204 1205 /* Evaluate the operator stack until empty */ 1206 while (!STACK_IS_EMPTY(lms.lms_cexp_op_stack)) { 1207 if (STACK_TOP(lms.lms_cexp_op_stack) == CEXP_OP_OPAR) 1208 goto unbalpar_error; 1209 1210 if (!cexp_eval_op()) 1211 goto semantic_error; 1212 } 1213 1214 /* There should be exactly one value left */ 1215 if (lms.lms_cexp_val_stack.stk_n != 1) 1216 goto semantic_error; 1217 1218 /* Final value is the result */ 1219 return (lms.lms_cexp_val_stack.stk_s[0]); 1220 1221 /* Errors issued more than once are handled below, accessed via goto */ 1222 1223 token_error: /* unexpected characters in input stream */ 1224 mf_fatal(mf, MSG_INTL(MSG_MAP_CEXP_TOKERR), str); 1225 return (-1); 1226 1227 semantic_error: /* valid tokens, but in invalid arrangement */ 1228 mf_fatal0(mf, MSG_INTL(MSG_MAP_CEXP_SEMERR)); 1229 return (-1); 1230 1231 unbalpar_error: /* Extra or missing parenthesis */ 1232 mf_fatal0(mf, MSG_INTL(MSG_MAP_CEXP_UNBALPAR)); 1233 return (-1); 1234 } 1235 1236 /* 1237 * Process a mapfile control directive. These directives start with 1238 * the dollar character, and are used to manage details of the mapfile 1239 * itself, such as version and conditional input. 1240 * 1241 * entry: 1242 * mf - Mapfile descriptor 1243 * 1244 * exit: 1245 * Returns TRUE (1) for success, and FALSE (0) on error. In the 1246 * error case, a descriptive error is issued. 1247 */ 1248 static Boolean 1249 cdir_process(Mapfile *mf) 1250 { 1251 typedef enum { /* Directive types */ 1252 CDIR_T_UNKNOWN = 0, /* Unrecognized control directive */ 1253 CDIR_T_ADD, /* $add */ 1254 CDIR_T_CLEAR, /* $clear */ 1255 CDIR_T_ERROR, /* $error */ 1256 CDIR_T_VERSION, /* $mapfile_version */ 1257 CDIR_T_IF, /* $if */ 1258 CDIR_T_ELIF, /* $elif */ 1259 CDIR_T_ELSE, /* $else */ 1260 CDIR_T_ENDIF, /* $endif */ 1261 } cdir_t; 1262 1263 typedef enum { /* Types of arguments accepted by directives */ 1264 ARG_T_NONE, /* Directive takes no arguments */ 1265 ARG_T_EXPR, /* Directive takes a conditional expression */ 1266 ARG_T_ID, /* Conditional expression identifier */ 1267 ARG_T_STR, /* Non-empty string */ 1268 ARG_T_IGN /* Ignore the argument */ 1269 } cdir_arg_t; 1270 1271 typedef struct { 1272 const char *md_name; /* Directive name */ 1273 size_t md_size; /* strlen(md_name) */ 1274 cdir_arg_t md_arg; /* Type of arguments */ 1275 cdir_t md_op; /* CDIR_T_ code */ 1276 } cdir_match_t; 1277 1278 /* Control Directives: The most likely items are listed first */ 1279 static cdir_match_t match_data[] = { 1280 { MSG_ORIG(MSG_STR_CDIR_IF), MSG_STR_CDIR_IF_SIZE, 1281 ARG_T_EXPR, CDIR_T_IF }, 1282 { MSG_ORIG(MSG_STR_CDIR_ENDIF), MSG_STR_CDIR_ENDIF_SIZE, 1283 ARG_T_NONE, CDIR_T_ENDIF }, 1284 { MSG_ORIG(MSG_STR_CDIR_ELSE), MSG_STR_CDIR_ELSE_SIZE, 1285 ARG_T_NONE, CDIR_T_ELSE }, 1286 { MSG_ORIG(MSG_STR_CDIR_ELIF), MSG_STR_CDIR_ELIF_SIZE, 1287 ARG_T_EXPR, CDIR_T_ELIF }, 1288 { MSG_ORIG(MSG_STR_CDIR_ERROR), MSG_STR_CDIR_ERROR_SIZE, 1289 ARG_T_STR, CDIR_T_ERROR }, 1290 { MSG_ORIG(MSG_STR_CDIR_ADD), MSG_STR_CDIR_ADD_SIZE, 1291 ARG_T_ID, CDIR_T_ADD }, 1292 { MSG_ORIG(MSG_STR_CDIR_CLEAR), MSG_STR_CDIR_CLEAR_SIZE, 1293 ARG_T_ID, CDIR_T_CLEAR }, 1294 { MSG_ORIG(MSG_STR_CDIR_MFVER), MSG_STR_CDIR_MFVER_SIZE, 1295 ARG_T_IGN, CDIR_T_VERSION }, 1296 1297 { NULL, 0, 1298 ARG_T_IGN, CDIR_T_UNKNOWN } 1299 }; 1300 1301 cdir_match_t *mdptr; 1302 char *tail; 1303 int expr_eval; /* Result of evaluating ARG_T_EXPR */ 1304 Mapfile arg_mf; 1305 cdir_level_t *level; 1306 int pass, parent_pass; /* Currently accepting input */ 1307 1308 restart: 1309 /* Is the immediate context passing input? */ 1310 pass = STACK_IS_EMPTY(lms.lms_cdir_stack) || 1311 STACK_TOP(lms.lms_cdir_stack).cdl_pass; 1312 1313 /* Is the surrounding (parent) context passing input? */ 1314 parent_pass = (lms.lms_cdir_stack.stk_n <= 1) || 1315 lms.lms_cdir_stack.stk_s[lms.lms_cdir_stack.stk_n - 2].cdl_pass; 1316 1317 1318 for (mdptr = match_data; mdptr->md_name; mdptr++) { 1319 /* Prefix must match, or we move on */ 1320 if (strncmp(mf->mf_next, mdptr->md_name, 1321 mdptr->md_size) != 0) 1322 continue; 1323 tail = mf->mf_next + mdptr->md_size; 1324 1325 /* 1326 * If there isn't whitespace, or a NULL terminator following 1327 * the prefix, then even though our prefix matched, the actual 1328 * token is longer, and we don't have a match. 1329 */ 1330 if (!isspace(*tail) && (*tail != '\0')) 1331 continue; 1332 1333 /* We have matched a valid control directive */ 1334 break; 1335 } 1336 1337 /* Advance input to end of the current line */ 1338 advance_to_eol(&mf->mf_next); 1339 1340 /* 1341 * Set up a temporary mapfile descriptor to reference the 1342 * argument string. The benefit of this second block, is that 1343 * we can advance the real one to the next line now, which allows 1344 * us to return at any time knowing that the input has been moved 1345 * to the proper spot. This simplifies the error cases. 1346 * 1347 * If we had a match, tail points at the start of the string. 1348 * Otherwise, we want to point at the end of the line. 1349 */ 1350 arg_mf = *mf; 1351 if (mdptr->md_name == NULL) 1352 arg_mf.mf_text = arg_mf.mf_next; 1353 else 1354 arg_mf.mf_text = arg_mf.mf_next = tail; 1355 1356 /* 1357 * Null terminate the arguments, and advance the main mapfile 1358 * state block to the next line. 1359 */ 1360 if (*mf->mf_next == '\n') { 1361 *mf->mf_next++ = '\0'; 1362 mf->mf_lineno++; 1363 } 1364 1365 /* Skip leading whitespace to arguments */ 1366 while (isspace(*arg_mf.mf_next)) 1367 arg_mf.mf_next++; 1368 1369 /* Strip off any comment present on the line */ 1370 for (tail = arg_mf.mf_next; *tail; tail++) 1371 if (*tail == '#') { 1372 *tail = '\0'; 1373 break; 1374 } 1375 1376 /* 1377 * Process the arguments as necessary depending on their type. 1378 * If this control directive is nested inside a surrounding context 1379 * that is not currently passing text, then we skip the argument 1380 * evaluation. This follows the behavior of the C preprocessor, 1381 * which only examines enough to detect the operation within 1382 * a disabled section, without issuing errors about the arguments. 1383 */ 1384 if (pass || (parent_pass && (mdptr->md_op == CDIR_T_ELIF))) { 1385 switch (mdptr->md_arg) { 1386 case ARG_T_NONE: 1387 if (*arg_mf.mf_next == '\0') 1388 break; 1389 /* Args are present, but not wanted */ 1390 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_REQNOARG), 1391 mdptr->md_name); 1392 return (FALSE); 1393 1394 case ARG_T_EXPR: 1395 /* Ensure that arguments are present */ 1396 if (*arg_mf.mf_next == '\0') 1397 goto error_reqarg; 1398 expr_eval = cexp_eval_expr(&arg_mf); 1399 if (expr_eval == -1) 1400 return (FALSE); 1401 break; 1402 1403 case ARG_T_ID: 1404 /* Ensure that arguments are present */ 1405 if (*arg_mf.mf_next == '\0') 1406 goto error_reqarg; 1407 if (!cexp_ident_validate(&arg_mf, NULL, 1408 mdptr->md_name)) 1409 return (FALSE); 1410 break; 1411 1412 case ARG_T_STR: 1413 /* Ensure that arguments are present */ 1414 if (*arg_mf.mf_next == '\0') 1415 goto error_reqarg; 1416 /* Remove trailing whitespace */ 1417 tail = arg_mf.mf_next + strlen(arg_mf.mf_next); 1418 while ((tail > arg_mf.mf_next) && 1419 isspace(*(tail -1))) 1420 tail--; 1421 *tail = '\0'; 1422 break; 1423 } 1424 } 1425 1426 /* 1427 * Carry out the specified control directive: 1428 */ 1429 if (!STACK_IS_EMPTY(lms.lms_cdir_stack)) 1430 level = &STACK_TOP(lms.lms_cdir_stack); 1431 1432 switch (mdptr->md_op) { 1433 case CDIR_T_UNKNOWN: /* Unrecognized control directive */ 1434 if (!pass) 1435 break; 1436 mf_fatal0(&arg_mf, MSG_INTL(MSG_MAP_CDIR_BAD)); 1437 return (FALSE); 1438 1439 case CDIR_T_ADD: 1440 if (pass && !cexp_ident_add(&arg_mf, arg_mf.mf_next)) 1441 return (FALSE); 1442 break; 1443 1444 case CDIR_T_CLEAR: 1445 if (pass) 1446 cexp_ident_clear(&arg_mf, arg_mf.mf_next); 1447 break; 1448 1449 case CDIR_T_ERROR: 1450 if (!pass) 1451 break; 1452 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_ERROR), 1453 arg_mf.mf_next); 1454 return (FALSE); 1455 1456 case CDIR_T_VERSION: 1457 /* 1458 * A $mapfile_version control directive can only appear 1459 * as the first directive in a mapfile, and is used to 1460 * determine the syntax for the rest of the file. It's 1461 * too late to be using it here. 1462 */ 1463 if (!pass) 1464 break; 1465 mf_fatal0(&arg_mf, MSG_INTL(MSG_MAP_CDIR_REPVER)); 1466 return (FALSE); 1467 1468 case CDIR_T_IF: 1469 /* Push a new level on the conditional input stack */ 1470 if (STACK_RESERVE(lms.lms_cdir_stack, CDIR_STACK_INIT) == 0) 1471 return (FALSE); 1472 level = &lms.lms_cdir_stack.stk_s[lms.lms_cdir_stack.stk_n++]; 1473 level->cdl_if_lineno = arg_mf.mf_lineno; 1474 level->cdl_else_lineno = 0; 1475 1476 /* 1477 * If previous level is not passing, this level is disabled. 1478 * Otherwise, the expression value determines what happens. 1479 */ 1480 if (pass) { 1481 level->cdl_done = level->cdl_pass = expr_eval; 1482 } else { 1483 level->cdl_done = 1; 1484 level->cdl_pass = 0; 1485 } 1486 break; 1487 1488 case CDIR_T_ELIF: 1489 /* $elif requires an open $if construct */ 1490 if (STACK_IS_EMPTY(lms.lms_cdir_stack)) { 1491 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_NOIF), 1492 MSG_ORIG(MSG_STR_CDIR_ELIF)); 1493 return (FALSE); 1494 } 1495 1496 /* $elif cannot follow $else */ 1497 if (level->cdl_else_lineno > 0) { 1498 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_ELSE), 1499 MSG_ORIG(MSG_STR_CDIR_ELIF), 1500 EC_LINENO(level->cdl_else_lineno)); 1501 return (FALSE); 1502 } 1503 1504 /* 1505 * Accept text from $elif if the level isn't already 1506 * done and the expression evaluates to true. 1507 */ 1508 level->cdl_pass = !level->cdl_done && expr_eval; 1509 if (level->cdl_pass) 1510 level->cdl_done = 1; 1511 break; 1512 1513 case CDIR_T_ELSE: 1514 /* $else requires an open $if construct */ 1515 if (STACK_IS_EMPTY(lms.lms_cdir_stack)) { 1516 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_NOIF), 1517 MSG_ORIG(MSG_STR_CDIR_ELSE)); 1518 return (FALSE); 1519 } 1520 1521 /* There can only be one $else in the chain */ 1522 if (level->cdl_else_lineno > 0) { 1523 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_ELSE), 1524 MSG_ORIG(MSG_STR_CDIR_ELSE), 1525 EC_LINENO(level->cdl_else_lineno)); 1526 return (FALSE); 1527 } 1528 level->cdl_else_lineno = arg_mf.mf_lineno; 1529 1530 /* Accept text from $else if the level isn't already done */ 1531 level->cdl_pass = !level->cdl_done; 1532 level->cdl_done = 1; 1533 break; 1534 1535 case CDIR_T_ENDIF: 1536 /* $endif requires an open $if construct */ 1537 if (STACK_IS_EMPTY(lms.lms_cdir_stack)) { 1538 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_NOIF), 1539 MSG_ORIG(MSG_STR_CDIR_ENDIF)); 1540 return (FALSE); 1541 } 1542 if (--lms.lms_cdir_stack.stk_n > 0) 1543 level = &STACK_TOP(lms.lms_cdir_stack); 1544 break; 1545 1546 default: 1547 return (FALSE); 1548 } 1549 1550 /* Evaluating the control directive above can change pass status */ 1551 expr_eval = STACK_IS_EMPTY(lms.lms_cdir_stack) || 1552 STACK_TOP(lms.lms_cdir_stack).cdl_pass; 1553 if (expr_eval != pass) { 1554 pass = expr_eval; 1555 DBG_CALL(Dbg_map_pass(arg_mf.mf_ofl->ofl_lml, pass, 1556 arg_mf.mf_name, arg_mf.mf_lineno, mdptr->md_name)); 1557 } 1558 1559 /* 1560 * At this point, we have processed a control directive, 1561 * updated our conditional state stack, and the input is 1562 * positioned at the start of the line following the directive. 1563 * If the current level is accepting input, then give control 1564 * back to ld_map_gettoken() to resume its normal operation. 1565 */ 1566 if (pass) 1567 return (TRUE); 1568 1569 /* 1570 * The current level is not accepting input. Only another 1571 * control directive can change this, so read and discard input 1572 * until we encounter one of the following: 1573 * 1574 * EOF: Return and let ld_map_gettoken() report it 1575 * Control Directive: Restart this function / evaluate new directive 1576 */ 1577 while (*mf->mf_next != '\0') { 1578 /* Skip leading whitespace */ 1579 while (isspace_nonl(*mf->mf_next)) 1580 mf->mf_next++; 1581 1582 /* 1583 * Control directives start with a '$'. If we hit 1584 * one, restart the function at this point 1585 */ 1586 if (*mf->mf_next == '$') 1587 goto restart; 1588 1589 /* Not a control directive, so advance input to next line */ 1590 advance_to_eol(&mf->mf_next); 1591 if (*mf->mf_next == '\n') { 1592 mf->mf_lineno++; 1593 mf->mf_next++; 1594 } 1595 } 1596 1597 assert(*mf->mf_next == '\0'); 1598 return (TRUE); 1599 1600 /* 1601 * Control directives that require an argument that is not present 1602 * jump here to report the error and exit. 1603 */ 1604 error_reqarg: 1605 mf_fatal(&arg_mf, MSG_INTL(MSG_MAP_CDIR_REQARG), mdptr->md_name); 1606 return (FALSE); 1607 1608 } 1609 1610 #ifndef _ELF64 1611 /* 1612 * Convert a string to lowercase. 1613 */ 1614 void 1615 ld_map_lowercase(char *str) 1616 { 1617 while (*str = tolower(*str)) 1618 str++; 1619 } 1620 #endif 1621 1622 /* 1623 * Wrappper on strtoul()/strtoull(), adapted to return an Xword. 1624 * 1625 * entry: 1626 * str - Pointer to string to be converted. 1627 * endptr - As documented for strtoul(3C). Either NULL, or 1628 * address of pointer to receive the address of the first 1629 * unused character in str (called "final" in strtoul(3C)). 1630 * ret_value - Address of Xword variable to receive result. 1631 * 1632 * exit: 1633 * On success, *ret_value receives the result, *endptr is updated if 1634 * endptr is non-NULL, and STRTOXWORD_OK is returned. 1635 * On failure, STRTOXWORD_TOBIG is returned if an otherwise valid 1636 * value was too large, and STRTOXWORD_BAD is returned if the string 1637 * is malformed. 1638 */ 1639 ld_map_strtoxword_t 1640 ld_map_strtoxword(const char *restrict str, char **restrict endptr, 1641 Xword *ret_value) 1642 { 1643 #if defined(_ELF64) /* _ELF64 */ 1644 #define FUNC strtoull /* Function to use */ 1645 #define FUNC_MAX ULLONG_MAX /* Largest value returned by FUNC */ 1646 #define XWORD_MAX ULLONG_MAX /* Largest Xword value */ 1647 uint64_t value; /* Variable of FUNC return type */ 1648 #else /* _ELF32 */ 1649 #define FUNC strtoul 1650 #define FUNC_MAX ULONG_MAX 1651 #define XWORD_MAX UINT_MAX 1652 ulong_t value; 1653 #endif 1654 1655 char *endptr_local; /* Used if endptr is NULL */ 1656 1657 if (endptr == NULL) 1658 endptr = &endptr_local; 1659 1660 errno = 0; 1661 value = FUNC(str, endptr, 0); 1662 if ((errno != 0) || (str == *endptr)) { 1663 if (value == FUNC_MAX) 1664 return (STRTOXWORD_TOOBIG); 1665 else 1666 return (STRTOXWORD_BAD); 1667 } 1668 1669 /* 1670 * If this is a 64-bit linker building an ELFCLASS32 object, 1671 * the FUNC return type is a 64-bit value, while an Xword is 1672 * 32-bit. It is possible for FUNC to be able to convert a value 1673 * too large for our return type. 1674 */ 1675 #if FUNC_MAX != XWORD_MAX 1676 if (value > XWORD_MAX) 1677 return (STRTOXWORD_TOOBIG); 1678 #endif 1679 1680 *ret_value = value; 1681 return (STRTOXWORD_OK); 1682 1683 #undef FUNC 1684 #undef FUNC_MAX 1685 #undef XWORD_MAC 1686 } 1687 1688 /* 1689 * Convert the unsigned integer value at the current mapfile input 1690 * into binary form. All numeric values in mapfiles are treated as 1691 * unsigned integers of the appropriate width for an address on the 1692 * given target. Values can be decimal, hex, or octal. 1693 * 1694 * entry: 1695 * str - String to process. 1696 * value - Address of variable to receive resulting value. 1697 * notail - If TRUE, an error is issued if non-whitespace 1698 * characters other than '#' (comment) are found following 1699 * the numeric value before the end of line. 1700 * 1701 * exit: 1702 * On success: 1703 * - *str is advanced to the next character following the value 1704 * - *value receives the value 1705 * - Returns TRUE (1). 1706 * On failure, returns FALSE (0). 1707 */ 1708 static Boolean 1709 ld_map_getint(Mapfile *mf, ld_map_tkval_t *value, Boolean notail) 1710 { 1711 ld_map_strtoxword_t s2xw_ret; 1712 ld_map_npatch_t np; 1713 char *endptr; 1714 char *startptr = mf->mf_next; 1715 char *errstr = mf->mf_next; 1716 1717 value->tkv_int.tkvi_str = mf->mf_next; 1718 s2xw_ret = ld_map_strtoxword(mf->mf_next, &endptr, 1719 &value->tkv_int.tkvi_value); 1720 if (s2xw_ret != STRTOXWORD_OK) { 1721 null_patch_eol(mf->mf_next, &np); 1722 if (s2xw_ret == STRTOXWORD_TOOBIG) 1723 mf_fatal(mf, MSG_INTL(MSG_MAP_VALUELIMIT), errstr); 1724 else 1725 mf_fatal(mf, MSG_INTL(MSG_MAP_MALVALUE), errstr); 1726 null_patch_undo(&np); 1727 return (FALSE); 1728 } 1729 1730 /* Advance position to item following value, skipping whitespace */ 1731 value->tkv_int.tkvi_cnt = endptr - startptr; 1732 mf->mf_next = endptr; 1733 1734 while (isspace_nonl(*mf->mf_next)) 1735 mf->mf_next++; 1736 1737 /* If requested, ensure there's nothing left */ 1738 if (notail && (*mf->mf_next != '\n') && (*mf->mf_next != '#') && 1739 (*mf->mf_next != '\0')) { 1740 null_patch_eol(mf->mf_next, &np); 1741 mf_fatal(mf, MSG_INTL(MSG_MAP_BADVALUETAIL), errstr); 1742 null_patch_undo(&np); 1743 return (FALSE); 1744 } 1745 1746 return (TRUE); 1747 } 1748 1749 /* 1750 * Convert a an unquoted identifier into a TK_STRING token, using the 1751 * rules for syntax version in use. Used exclusively by ld_map_gettoken(). 1752 * 1753 * entry: 1754 * mf - Mapfile descriptor, positioned to the first character of 1755 * the string. 1756 * flags - Bitmask of options to control ld_map_gettoken()s behavior 1757 * tkv- Address of pointer to variable to receive token value. 1758 * 1759 * exit: 1760 * On success, mf is advanced past the token, tkv is updated with 1761 * the string, and TK_STRING is returned. On error, TK_ERROR is returned. 1762 */ 1763 inline static Token 1764 gettoken_ident(Mapfile *mf, int flags, ld_map_tkval_t *tkv) 1765 { 1766 char *end; 1767 Token tok; 1768 ld_map_npatch_t np; 1769 1770 tkv->tkv_str = mf->mf_next; 1771 if ((end = ident_delimit(mf)) == NULL) 1772 return (TK_ERROR); 1773 mf->mf_next = end; 1774 1775 /* 1776 * One advantage of reading the entire mapfile into memory is that 1777 * we can access the strings within it without having to allocate 1778 * more memory or make copies. In order to do that, we need to NULL 1779 * terminate this identifier. That is going to overwrite the 1780 * following character. The problem this presents is that the next 1781 * character may well be the first character of a subsequent token. 1782 * The solution to this is: 1783 * 1784 * 1) Disallow the case where the next character is able to 1785 * start a string. This is not legal mapfile syntax anyway, 1786 * so catching it here simplifies matters. 1787 * 2) Copy the character into the special mf->mf_next_ch 1788 * 3) The next call to ld_map_gettoken() checks mf->mf_next_ch, 1789 * and if it is non-0, uses it instead of dereferencing the 1790 * mf_next pointer. 1791 */ 1792 tok = (*mf->mf_next & 0x80) ? 1793 TK_OP_ILLCHR : mf->mf_tokdisp[(unsigned)*mf->mf_next]; 1794 switch (tok) { 1795 case TK_OP_BADCHR: 1796 null_patch_eol(mf->mf_next, &np); 1797 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCHAR), mf->mf_next); 1798 null_patch_undo(&np); 1799 return (TK_ERROR); 1800 1801 case TK_OP_SIMQUOTE: 1802 case TK_OP_CQUOTE: 1803 case TK_OP_CDIR: 1804 case TK_OP_NUM: 1805 case TK_OP_ID: 1806 null_patch_eol(mf->mf_next, &np); 1807 mf_fatal(mf, MSG_INTL(MSG_MAP_WSNEEDED), mf->mf_next); 1808 null_patch_undo(&np); 1809 return (TK_ERROR); 1810 } 1811 1812 /* Null terminate, saving the replaced character */ 1813 mf->mf_next_ch = *mf->mf_next; 1814 *mf->mf_next = '\0'; 1815 1816 if (flags & TK_F_STRLC) 1817 ld_map_lowercase(tkv->tkv_str); 1818 return (TK_STRING); 1819 } 1820 1821 /* 1822 * Convert a quoted string into a TK_STRING token, using simple 1823 * quoting rules: 1824 * - Start and end quotes must be present and match 1825 * - There are no special characters or escape sequences. 1826 * This function is used exclusively by ld_map_gettoken(). 1827 * 1828 * entry: 1829 * mf - Mapfile descriptor, positioned to the opening quote character. 1830 * flags - Bitmask of options to control ld_map_gettoken()s behavior 1831 * tkv- Address of pointer to variable to receive token value. 1832 * 1833 * exit: 1834 * On success, mf is advanced past the token, tkv is updated with 1835 * the string, and TK_STRING is returned. On error, TK_ERROR is returned. 1836 */ 1837 inline static Token 1838 gettoken_simquote_str(Mapfile *mf, int flags, ld_map_tkval_t *tkv) 1839 { 1840 char *str, *end; 1841 char quote; 1842 1843 str = mf->mf_next++; 1844 quote = *str; 1845 end = mf->mf_next; 1846 while ((*end != '\0') && (*end != '\n') && (*end != quote)) 1847 end++; 1848 if (*end != quote) { 1849 ld_map_npatch_t np; 1850 1851 null_patch_eol(end, &np); 1852 mf_fatal(mf, MSG_INTL(MSG_MAP_NOTERM), str); 1853 null_patch_undo(&np); 1854 return (TK_ERROR); 1855 } 1856 1857 /* 1858 * end is pointing at the closing quote. We can turn that into NULL 1859 * termination for the string without needing to restore it later. 1860 */ 1861 *end = '\0'; 1862 mf->mf_next = end + 1; 1863 tkv->tkv_str = str + 1; /* Skip opening quote */ 1864 if (flags & TK_F_STRLC) 1865 ld_map_lowercase(tkv->tkv_str); 1866 return (TK_STRING); 1867 } 1868 1869 /* 1870 * Convert a quoted string into a TK_STRING token, using C string literal 1871 * quoting rules: 1872 * - Start and end quotes must be present and match 1873 * - Backslash is an escape, used to introduce special characters 1874 * This function is used exclusively by ld_map_gettoken(). 1875 * 1876 * entry: 1877 * mf - Mapfile descriptor, positioned to the opening quote character. 1878 * flags - Bitmask of options to control ld_map_gettoken()s behavior 1879 * tkv- Address of pointer to variable to receive token value. 1880 * 1881 * exit: 1882 * On success, mf is advanced past the token, tkv is updated with 1883 * the string, and TK_STRING is returned. On error, TK_ERROR is returned. 1884 */ 1885 inline static Token 1886 gettoken_cquote_str(Mapfile *mf, int flags, ld_map_tkval_t *tkv) 1887 { 1888 char *str, *cur, *end; 1889 char quote; 1890 int c; 1891 1892 /* 1893 * This function goes through the quoted string and copies 1894 * it on top of itself, replacing escape sequences with the 1895 * characters they denote. There is always enough room for this, 1896 * because escapes are multi-character sequences that are converted 1897 * to single character results. 1898 */ 1899 str = mf->mf_next++; 1900 quote = *str; 1901 cur = end = mf->mf_next; 1902 for (c = *end++; (c != '\0') && (c != '\n') && (c != quote); 1903 c = *end++) { 1904 if (c == '\\') { 1905 c = conv_translate_c_esc(&end); 1906 if (c == -1) { 1907 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCESC), *end); 1908 return (TK_ERROR); 1909 } 1910 } 1911 *cur++ = c; 1912 } 1913 *cur = '\0'; /* terminate the result */ 1914 if (c != quote) { 1915 ld_map_npatch_t np; 1916 1917 null_patch_eol(end, &np); 1918 mf_fatal(mf, MSG_INTL(MSG_MAP_NOTERM), str); 1919 null_patch_undo(&np); 1920 return (TK_ERROR); 1921 } 1922 1923 /* end is pointing one character past the closing quote */ 1924 mf->mf_next = end; 1925 tkv->tkv_str = str + 1; /* Skip opening quote */ 1926 if (flags & TK_F_STRLC) 1927 ld_map_lowercase(tkv->tkv_str); 1928 return (TK_STRING); 1929 } 1930 1931 /* 1932 * Peek ahead at the text token. 1933 * 1934 * entry: 1935 * mf - Mapfile descriptor 1936 * 1937 * exit: 1938 * Returns one of the TK_* values, including the TK_OP values (that is, 1939 * tokens are not processed into their necessarily final form). 1940 */ 1941 Token 1942 ld_map_peektoken(Mapfile *mf) 1943 { 1944 int ch; 1945 1946 if (mf->mf_next_ch == 0) 1947 ch = *mf->mf_next; 1948 else 1949 ch = mf->mf_next_ch; 1950 1951 return ((ch & 0x80) ? TK_OP_ILLCHR : mf->mf_tokdisp[ch]); 1952 } 1953 1954 /* 1955 * Get a token from the mapfile. 1956 * 1957 * entry: 1958 * mf - Mapfile descriptor 1959 * flags - Bitmask of options to control ld_map_gettoken()s behavior 1960 * tkv- Address of pointer to variable to receive token value. 1961 * 1962 * exit: 1963 * Returns one of the TK_* values, to report the result. If the resulting 1964 * token has a value (TK_STRING / TK_INT), and tkv is non-NULL, tkv 1965 * is filled in with the resulting value. 1966 */ 1967 Token 1968 ld_map_gettoken(Mapfile *mf, int flags, ld_map_tkval_t *tkv) 1969 { 1970 int cdir_allow, ch; 1971 Token tok; 1972 ld_map_npatch_t np; 1973 1974 /* 1975 * Mapfile control directives all start with a '$' character. However, 1976 * they are only valid when they are the first thing on a line. That 1977 * happens on the first call to ld_map_gettoken() for a new a new 1978 * mapfile, as tracked with lms.lms_cdir_valid, and immediately 1979 * following each newline seen in the file. 1980 */ 1981 cdir_allow = lms.lms_cdir_valid; 1982 lms.lms_cdir_valid = 0; 1983 1984 /* Cycle through the characters looking for tokens. */ 1985 for (;;) { 1986 /* 1987 * Process the next character. This is normally *mf->mf_next, 1988 * but if mf->mf_next_ch is non-0, then it contains the 1989 * character, and *mf->mf_next contains a NULL termination 1990 * from the TK_STRING token returned on the previous call. 1991 * 1992 * gettoken_ident() ensures that this is never done to 1993 * a character that starts a string. 1994 */ 1995 if (mf->mf_next_ch == 0) { 1996 ch = *mf->mf_next; 1997 } else { 1998 ch = mf->mf_next_ch; 1999 mf->mf_next_ch = 0; /* Reset */ 2000 } 2001 2002 /* Map the character to a dispatch action */ 2003 tok = (ch & 0x80) ? TK_OP_ILLCHR : mf->mf_tokdisp[ch]; 2004 2005 /* 2006 * Items that require processing are identified as OP tokens. 2007 * We process them, and return a result non-OP token. 2008 * 2009 * Non-OP tokens are single character tokens, and we return 2010 * them immediately. 2011 */ 2012 switch (tok) { 2013 case TK_OP_EOF: 2014 /* If EOFOK is set, quietly report it as TK_EOF */ 2015 if ((flags & TK_F_EOFOK) != 0) 2016 return (TK_EOF); 2017 2018 /* Treat it as a standard error */ 2019 mf_fatal0(mf, MSG_INTL(MSG_MAP_PREMEOF)); 2020 return (TK_ERROR); 2021 2022 case TK_OP_ILLCHR: 2023 mf_fatal(mf, MSG_INTL(MSG_MAP_ILLCHAR), ch); 2024 mf->mf_next++; 2025 return (TK_ERROR); 2026 2027 case TK_OP_BADCHR: 2028 tk_op_badchr: 2029 null_patch_eol(mf->mf_next, &np); 2030 mf_fatal(mf, MSG_INTL(MSG_MAP_BADCHAR), mf->mf_next); 2031 null_patch_undo(&np); 2032 mf->mf_next++; 2033 return (TK_ERROR); 2034 2035 case TK_OP_WS: /* White space */ 2036 mf->mf_next++; 2037 break; 2038 2039 case TK_OP_NL: /* White space too, but bump line number. */ 2040 mf->mf_next++; 2041 mf->mf_lineno++; 2042 cdir_allow = 1; 2043 break; 2044 2045 case TK_OP_SIMQUOTE: 2046 if (flags & TK_F_KEYWORD) 2047 goto tk_op_badkwquote; 2048 return (gettoken_simquote_str(mf, flags, tkv)); 2049 2050 case TK_OP_CQUOTE: 2051 if (flags & TK_F_KEYWORD) { 2052 tk_op_badkwquote: 2053 null_patch_eol(mf->mf_next, &np); 2054 mf_fatal(mf, MSG_INTL(MSG_MAP_BADKWQUOTE), 2055 mf->mf_next); 2056 null_patch_undo(&np); 2057 mf->mf_next++; 2058 return (TK_ERROR); 2059 } 2060 return (gettoken_cquote_str(mf, flags, tkv)); 2061 2062 case TK_OP_CMT: 2063 advance_to_eol(&mf->mf_next); 2064 break; 2065 2066 case TK_OP_CDIR: 2067 /* 2068 * Control directives are only valid at the start 2069 * of a line. 2070 */ 2071 if (!cdir_allow) { 2072 null_patch_eol(mf->mf_next, &np); 2073 mf_fatal(mf, MSG_INTL(MSG_MAP_CDIR_NOTBOL), 2074 mf->mf_next); 2075 null_patch_undo(&np); 2076 mf->mf_next++; 2077 return (TK_ERROR); 2078 } 2079 if (!cdir_process(mf)) 2080 return (TK_ERROR); 2081 break; 2082 2083 case TK_OP_NUM: /* Decimal, hex(0x...), or octal (0...) value */ 2084 if (!ld_map_getint(mf, tkv, FALSE)) 2085 return (TK_ERROR); 2086 return (TK_INT); 2087 2088 case TK_OP_ID: /* Unquoted identifier */ 2089 return (gettoken_ident(mf, flags, tkv)); 2090 2091 case TK_OP_CEQUAL: /* += or -= */ 2092 if (*(mf->mf_next + 1) != '=') 2093 goto tk_op_badchr; 2094 tok = (ch == '+') ? TK_PLUSEQ : TK_MINUSEQ; 2095 mf->mf_next += 2; 2096 return (tok); 2097 2098 default: /* Non-OP token */ 2099 mf->mf_next++; 2100 return (tok); 2101 } 2102 } 2103 } 2104 2105 /* 2106 * Given a token and value returned by ld_map_gettoken(), return a string 2107 * representation of it suitable for use in an error message. 2108 * 2109 * entry: 2110 * tok - Token code. Must not be an OP-token 2111 * tkv - Token value 2112 */ 2113 const char * 2114 ld_map_tokenstr(Token tok, ld_map_tkval_t *tkv, Conv_inv_buf_t *inv_buf) 2115 { 2116 size_t cnt; 2117 2118 switch (tok) { 2119 case TK_ERROR: 2120 return (MSG_ORIG(MSG_STR_ERROR)); 2121 case TK_EOF: 2122 return (MSG_ORIG(MSG_STR_EOF)); 2123 case TK_STRING: 2124 return (tkv->tkv_str); 2125 case TK_COLON: 2126 return (MSG_ORIG(MSG_QSTR_COLON)); 2127 case TK_SEMICOLON: 2128 return (MSG_ORIG(MSG_QSTR_SEMICOLON)); 2129 case TK_EQUAL: 2130 return (MSG_ORIG(MSG_QSTR_EQUAL)); 2131 case TK_PLUSEQ: 2132 return (MSG_ORIG(MSG_QSTR_PLUSEQ)); 2133 case TK_MINUSEQ: 2134 return (MSG_ORIG(MSG_QSTR_MINUSEQ)); 2135 case TK_ATSIGN: 2136 return (MSG_ORIG(MSG_QSTR_ATSIGN)); 2137 case TK_DASH: 2138 return (MSG_ORIG(MSG_QSTR_DASH)); 2139 case TK_LEFTBKT: 2140 return (MSG_ORIG(MSG_QSTR_LEFTBKT)); 2141 case TK_RIGHTBKT: 2142 return (MSG_ORIG(MSG_QSTR_RIGHTBKT)); 2143 case TK_LEFTSQR: 2144 return (MSG_ORIG(MSG_QSTR_LEFTSQR)); 2145 case TK_RIGHTSQR: 2146 return (MSG_ORIG(MSG_QSTR_RIGHTSQR)); 2147 case TK_PIPE: 2148 return (MSG_ORIG(MSG_QSTR_PIPE)); 2149 case TK_INT: 2150 cnt = tkv->tkv_int.tkvi_cnt; 2151 if (cnt >= sizeof (inv_buf->buf)) 2152 cnt = sizeof (inv_buf->buf) - 1; 2153 (void) memcpy(inv_buf->buf, tkv->tkv_int.tkvi_str, cnt); 2154 inv_buf->buf[cnt] = '\0'; 2155 return (inv_buf->buf); 2156 case TK_STAR: 2157 return (MSG_ORIG(MSG_QSTR_STAR)); 2158 case TK_BANG: 2159 return (MSG_ORIG(MSG_QSTR_BANG)); 2160 default: 2161 assert(0); 2162 break; 2163 } 2164 2165 /*NOTREACHED*/ 2166 return (MSG_INTL(MSG_MAP_INTERR)); 2167 } 2168 2169 /* 2170 * Advance the input to the first non-empty line, and determine 2171 * the mapfile version. The version is specified by the mapfile 2172 * using a $mapfile_version directive. The original System V 2173 * syntax lacks this directive, and we use that fact to identify 2174 * such files. SysV mapfile are implicitly defined to have version 1. 2175 * 2176 * entry: 2177 * ofl - Output file descriptor 2178 * mf - Mapfile block 2179 * 2180 * exit: 2181 * On success, updates mf->mf_version, and returns TRUE (1). 2182 * On failure, returns FALSE (0). 2183 */ 2184 static Boolean 2185 mapfile_version(Mapfile *mf) 2186 { 2187 char *line_start = mf->mf_next; 2188 Boolean cont = TRUE; 2189 Boolean status = TRUE; /* Assume success */ 2190 Token tok; 2191 2192 mf->mf_version = MFV_SYSV; 2193 2194 /* 2195 * Cycle through the characters looking for tokens. Although the 2196 * true version is not known yet, we use the v2 dispatch table. 2197 * It contains control directives, which we need for this search, 2198 * and the other TK_OP_ tokens we will recognize and act on are the 2199 * same for both tables. 2200 * 2201 * It is important not to process any tokens that would lead to 2202 * a non-OP token: 2203 * 2204 * - The version is required to interpret them 2205 * - Our mapfile descriptor is not fully initialized, 2206 * attempts to run that code will crash the program. 2207 */ 2208 while (cont) { 2209 /* Map the character to a dispatch action */ 2210 tok = (*mf->mf_next & 0x80) ? 2211 TK_OP_ILLCHR : gettok_dispatch_v2[(unsigned)*mf->mf_next]; 2212 2213 switch (tok) { 2214 case TK_OP_WS: /* White space */ 2215 mf->mf_next++; 2216 break; 2217 2218 case TK_OP_NL: /* White space too, but bump line number. */ 2219 mf->mf_next++; 2220 mf->mf_lineno++; 2221 break; 2222 2223 case TK_OP_CMT: 2224 advance_to_eol(&mf->mf_next); 2225 break; 2226 2227 case TK_OP_CDIR: 2228 /* 2229 * Control directives are only valid at the start 2230 * of a line. However, as we have not yet seen 2231 * a token, we do not need to test for this, and 2232 * can safely assume that we are at the start. 2233 */ 2234 if (!strncasecmp(mf->mf_next, 2235 MSG_ORIG(MSG_STR_CDIR_MFVER), 2236 MSG_STR_CDIR_MFVER_SIZE) && 2237 isspace_nonl(*(mf->mf_next + 2238 MSG_STR_CDIR_MFVER_SIZE))) { 2239 ld_map_tkval_t ver; 2240 2241 mf->mf_next += MSG_STR_CDIR_MFVER_SIZE + 1; 2242 if (!ld_map_getint(mf, &ver, TRUE)) { 2243 status = cont = FALSE; 2244 break; 2245 } 2246 /* 2247 * Is it a valid version? Note that we 2248 * intentionally do not allow you to 2249 * specify version 1 using the $mapfile_version 2250 * syntax, because that's reserved to version 2251 * 2 and up. 2252 */ 2253 if ((ver.tkv_int.tkvi_value < 2) || 2254 (ver.tkv_int.tkvi_value >= MFV_NUM)) { 2255 const char *fmt; 2256 2257 fmt = (ver.tkv_int.tkvi_value < 2) ? 2258 MSG_INTL(MSG_MAP_CDIR_BADVDIR) : 2259 MSG_INTL(MSG_MAP_CDIR_BADVER); 2260 mf_fatal(mf, fmt, 2261 EC_WORD(ver.tkv_int.tkvi_value)); 2262 status = cont = FALSE; 2263 break; 2264 } 2265 mf->mf_version = ver.tkv_int.tkvi_value; 2266 cont = FALSE; /* Version recovered. All done */ 2267 break; 2268 } 2269 /* 2270 * Not a version directive. Reset the current position 2271 * to the start of the current line and stop here. 2272 * SysV syntax applies. 2273 */ 2274 mf->mf_next = line_start; 2275 cont = FALSE; 2276 break; 2277 2278 default: 2279 /* 2280 * If we see anything else, then stop at this point. 2281 * The file has System V syntax (version 1), and the 2282 * next token should be interpreted as such. 2283 */ 2284 cont = FALSE; 2285 break; 2286 } 2287 } 2288 2289 return (status); 2290 } 2291 2292 /* 2293 * Parse the mapfile. 2294 */ 2295 Boolean 2296 ld_map_parse(const char *mapfile, Ofl_desc *ofl) 2297 { 2298 struct stat stat_buf; /* stat of mapfile */ 2299 int mapfile_fd; /* descriptor for mapfile */ 2300 int err; 2301 Mapfile *mf; /* Mapfile descriptor */ 2302 size_t name_len; /* strlen(mapfile) */ 2303 2304 /* 2305 * Determine if we're dealing with a file or a directory. 2306 */ 2307 if (stat(mapfile, &stat_buf) == -1) { 2308 err = errno; 2309 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_STAT), mapfile, 2310 strerror(err)); 2311 return (FALSE); 2312 } 2313 if (S_ISDIR(stat_buf.st_mode)) { 2314 DIR *dirp; 2315 struct dirent *denp; 2316 2317 /* 2318 * Open the directory and interpret each visible file as a 2319 * mapfile. 2320 */ 2321 if ((dirp = opendir(mapfile)) == NULL) 2322 return (TRUE); 2323 2324 while ((denp = readdir(dirp)) != NULL) { 2325 char path[PATH_MAX]; 2326 2327 /* 2328 * Ignore any hidden filenames. Construct the full 2329 * pathname to the new mapfile. 2330 */ 2331 if (*denp->d_name == '.') 2332 continue; 2333 (void) snprintf(path, PATH_MAX, MSG_ORIG(MSG_STR_PATH), 2334 mapfile, denp->d_name); 2335 if (!ld_map_parse(path, ofl)) 2336 return (FALSE); 2337 } 2338 (void) closedir(dirp); 2339 return (TRUE); 2340 } else if (!S_ISREG(stat_buf.st_mode)) { 2341 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_NOTREG), mapfile); 2342 return (FALSE); 2343 } 2344 2345 /* Open file */ 2346 if ((mapfile_fd = open(mapfile, O_RDONLY)) == -1) { 2347 err = errno; 2348 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_OPEN), mapfile, 2349 strerror(err)); 2350 return (FALSE); 2351 } 2352 2353 /* 2354 * Allocate enough memory to hold the state block, mapfile name, 2355 * and mapfile text. Text has alignment 1, so it can follow the 2356 * state block without padding. 2357 */ 2358 name_len = strlen(mapfile) + 1; 2359 mf = libld_malloc(sizeof (*mf) + name_len + stat_buf.st_size + 1); 2360 if (mf == NULL) 2361 return (FALSE); 2362 mf->mf_ofl = ofl; 2363 mf->mf_name = (char *)(mf + 1); 2364 (void) strcpy(mf->mf_name, mapfile); 2365 mf->mf_text = mf->mf_name + name_len; 2366 if (read(mapfile_fd, mf->mf_text, stat_buf.st_size) != 2367 stat_buf.st_size) { 2368 err = errno; 2369 ld_eprintf(ofl, ERR_FATAL, MSG_INTL(MSG_SYS_READ), mapfile, 2370 strerror(err)); 2371 (void) close(mapfile_fd); 2372 return (FALSE); 2373 } 2374 (void) close(mapfile_fd); 2375 mf->mf_text[stat_buf.st_size] = '\0'; 2376 mf->mf_next = mf->mf_text; 2377 mf->mf_lineno = 1; 2378 mf->mf_next_ch = 0; /* No "lookahead" character yet */ 2379 mf->mf_ec_insndx = 0; /* Insert entrace criteria at top */ 2380 2381 /* 2382 * Read just enough from the mapfile to determine the version, 2383 * and then dispatch to the appropriate code for further processing 2384 */ 2385 if (!mapfile_version(mf)) 2386 return (FALSE); 2387 2388 /* 2389 * Start and continuation masks for unquoted identifier at this 2390 * mapfile version level. 2391 */ 2392 mf->mf_tkid_start = TKID_ATTR_START(mf->mf_version); 2393 mf->mf_tkid_cont = TKID_ATTR_CONT(mf->mf_version); 2394 2395 DBG_CALL(Dbg_map_parse(ofl->ofl_lml, mapfile, mf->mf_version)); 2396 2397 switch (mf->mf_version) { 2398 case MFV_SYSV: 2399 /* Guidance: Use newer mapfile syntax */ 2400 if (OFL_GUIDANCE(ofl, FLG_OFG_NO_MF)) 2401 ld_eprintf(ofl, ERR_GUIDANCE, 2402 MSG_INTL(MSG_GUIDE_MAPFILE), mapfile); 2403 2404 mf->mf_tokdisp = gettok_dispatch_v1; 2405 if (!ld_map_parse_v1(mf)) 2406 return (FALSE); 2407 break; 2408 2409 case MFV_SOLARIS: 2410 mf->mf_tokdisp = gettok_dispatch_v2; 2411 STACK_RESET(lms.lms_cdir_stack); 2412 2413 /* 2414 * If the conditional expression identifier tree has not been 2415 * initialized, set it up. This is only done on the first 2416 * mapfile, because the identifier control directives accumulate 2417 * across all the mapfiles. 2418 */ 2419 if ((lms.lms_cexp_id == NULL) && !cexp_ident_init()) 2420 return (FALSE); 2421 2422 /* 2423 * Tell ld_map_gettoken() we will accept a '$' as starting a 2424 * control directive on the first call. Normally, they are 2425 * only allowed after a newline. 2426 */ 2427 lms.lms_cdir_valid = 1; 2428 2429 if (!ld_map_parse_v2(mf)) 2430 return (FALSE); 2431 2432 /* Did we leave any open $if control directives? */ 2433 if (!STACK_IS_EMPTY(lms.lms_cdir_stack)) { 2434 while (!STACK_IS_EMPTY(lms.lms_cdir_stack)) { 2435 cdir_level_t *level = 2436 &STACK_POP(lms.lms_cdir_stack); 2437 2438 mf_fatal(mf, MSG_INTL(MSG_MAP_CDIR_NOEND), 2439 EC_LINENO(level->cdl_if_lineno)); 2440 } 2441 return (FALSE); 2442 } 2443 break; 2444 } 2445 2446 return (TRUE); 2447 } 2448 2449 /* 2450 * Sort the segment list. This is necessary if a mapfile has set explicit 2451 * virtual addresses for segments, or defined a SEGMENT_ORDER directive. 2452 * 2453 * Only PT_LOAD segments can be assigned a virtual address. These segments can 2454 * be one of two types: 2455 * 2456 * - Standard segments for text, data or bss. These segments will have been 2457 * inserted before the default text (first PT_LOAD) segment. 2458 * 2459 * - Empty (reservation) segments. These segment will have been inserted at 2460 * the end of any default PT_LOAD segments. 2461 * 2462 * Any standard segments that are assigned a virtual address will be sorted, 2463 * and as their definitions precede any default PT_LOAD segments, these segments 2464 * will be assigned sections before any defaults. 2465 * 2466 * Any reservation segments are also sorted amoung themselves, as these segments 2467 * must still follow the standard default segments. 2468 */ 2469 static Boolean 2470 sort_seg_list(Ofl_desc *ofl) 2471 { 2472 APlist *sort_segs = NULL, *load_segs = NULL; 2473 Sg_desc *sgp1; 2474 Aliste idx1; 2475 Aliste nsegs; 2476 2477 2478 /* 2479 * We know the number of elements in the sorted list will be 2480 * the same as the original, so use this as the initial allocation 2481 * size for the replacement aplist. 2482 */ 2483 nsegs = aplist_nitems(ofl->ofl_segs); 2484 2485 2486 /* Add the items below SGID_TEXT to the list */ 2487 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) { 2488 if (sgp1->sg_id >= SGID_TEXT) 2489 break; 2490 2491 if (aplist_append(&sort_segs, sgp1, nsegs) == NULL) 2492 return (FALSE); 2493 } 2494 2495 /* 2496 * If there are any SEGMENT_ORDER items, add them, and set their 2497 * FLG_SG_ORDERED flag to identify them in debug output, and to 2498 * prevent them from being added again below. 2499 */ 2500 for (APLIST_TRAVERSE(ofl->ofl_segs_order, idx1, sgp1)) { 2501 if (aplist_append(&sort_segs, sgp1, nsegs) == NULL) 2502 return (FALSE); 2503 sgp1->sg_flags |= FLG_SG_ORDERED; 2504 } 2505 2506 /* 2507 * Add the loadable segments to another list in sorted order. 2508 */ 2509 DBG_CALL(Dbg_map_sort_title(ofl->ofl_lml, TRUE)); 2510 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) { 2511 DBG_CALL(Dbg_map_sort_seg(ofl->ofl_lml, ELFOSABI_SOLARIS, 2512 ld_targ.t_m.m_mach, sgp1)); 2513 2514 /* Only interested in PT_LOAD items not in SEGMENT_ORDER list */ 2515 if ((sgp1->sg_phdr.p_type != PT_LOAD) || 2516 (sgp1->sg_flags & FLG_SG_ORDERED)) 2517 continue; 2518 2519 /* 2520 * If the loadable segment does not contain a vaddr, simply 2521 * append it to the new list. 2522 */ 2523 if ((sgp1->sg_flags & FLG_SG_P_VADDR) == 0) { 2524 if (aplist_append(&load_segs, sgp1, AL_CNT_SEGMENTS) == 2525 NULL) 2526 return (FALSE); 2527 2528 } else { 2529 Aliste idx2; 2530 Sg_desc *sgp2; 2531 int inserted = 0; 2532 2533 /* 2534 * Traverse the segment list we are creating, looking 2535 * for a segment that defines a vaddr. 2536 */ 2537 for (APLIST_TRAVERSE(load_segs, idx2, sgp2)) { 2538 /* 2539 * Any real segments that contain vaddr's need 2540 * to be sorted. Any reservation segments also 2541 * need to be sorted. However, any reservation 2542 * segments should be placed after any real 2543 * segments. 2544 */ 2545 if (((sgp2->sg_flags & 2546 (FLG_SG_P_VADDR | FLG_SG_EMPTY)) == 0) && 2547 (sgp1->sg_flags & FLG_SG_EMPTY)) 2548 continue; 2549 2550 if ((sgp2->sg_flags & FLG_SG_P_VADDR) && 2551 ((sgp2->sg_flags & FLG_SG_EMPTY) == 2552 (sgp1->sg_flags & FLG_SG_EMPTY))) { 2553 if (sgp1->sg_phdr.p_vaddr == 2554 sgp2->sg_phdr.p_vaddr) { 2555 ld_eprintf(ofl, ERR_FATAL, 2556 MSG_INTL(MSG_MAP_SEGSAME), 2557 sgp1->sg_name, 2558 sgp2->sg_name); 2559 return (FALSE); 2560 } 2561 2562 if (sgp1->sg_phdr.p_vaddr > 2563 sgp2->sg_phdr.p_vaddr) 2564 continue; 2565 } 2566 2567 /* 2568 * Insert this segment before the segment on 2569 * the load_segs list. 2570 */ 2571 if (aplist_insert(&load_segs, sgp1, 2572 AL_CNT_SEGMENTS, idx2) == NULL) 2573 return (FALSE); 2574 inserted = 1; 2575 break; 2576 } 2577 2578 /* 2579 * If the segment being inspected has not been inserted 2580 * in the segment list, simply append it to the list. 2581 */ 2582 if ((inserted == 0) && (aplist_append(&load_segs, 2583 sgp1, AL_CNT_SEGMENTS) == NULL)) 2584 return (FALSE); 2585 } 2586 } 2587 2588 /* 2589 * Add the sorted loadable segments to our initial segment list. 2590 */ 2591 for (APLIST_TRAVERSE(load_segs, idx1, sgp1)) { 2592 if (aplist_append(&sort_segs, sgp1, AL_CNT_SEGMENTS) == NULL) 2593 return (FALSE); 2594 } 2595 2596 /* 2597 * Add all other segments to our list. 2598 */ 2599 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) { 2600 if ((sgp1->sg_id < SGID_TEXT) || 2601 (sgp1->sg_phdr.p_type == PT_LOAD) || 2602 (sgp1->sg_flags & FLG_SG_ORDERED)) 2603 continue; 2604 2605 if (aplist_append(&sort_segs, sgp1, AL_CNT_SEGMENTS) == NULL) 2606 return (FALSE); 2607 } 2608 2609 /* 2610 * Free the original list, and the pt_load list, and use 2611 * the new list as the segment list. 2612 */ 2613 free(ofl->ofl_segs); 2614 if (load_segs) free(load_segs); 2615 ofl->ofl_segs = sort_segs; 2616 2617 if (DBG_ENABLED) { 2618 Dbg_map_sort_title(ofl->ofl_lml, FALSE); 2619 for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp1)) { 2620 Dbg_map_sort_seg(ofl->ofl_lml, ELFOSABI_SOLARIS, 2621 ld_targ.t_m.m_mach, sgp1); 2622 } 2623 } 2624 2625 return (TRUE); 2626 } 2627 2628 /* 2629 * After all mapfiles have been processed, this routine is used to 2630 * finish any remaining mapfile related work. 2631 * 2632 * exit: 2633 * Returns TRUE on success, and FALSE on failure. 2634 */ 2635 Boolean 2636 ld_map_post_process(Ofl_desc *ofl) 2637 { 2638 Aliste idx, idx2; 2639 Is_desc *isp; 2640 Sg_desc *sgp; 2641 Ent_desc *enp; 2642 Sg_desc *first_seg = NULL; 2643 2644 2645 DBG_CALL(Dbg_map_post_title(ofl->ofl_lml)); 2646 2647 /* 2648 * Per-segment processing: 2649 * - Identify segments with explicit virtual address 2650 * - Details of input and output section order 2651 */ 2652 for (APLIST_TRAVERSE(ofl->ofl_segs, idx, sgp)) { 2653 /* 2654 * We are looking for segments. Program headers that represent 2655 * segments are required to have a non-NULL name pointer, 2656 * while that those that do not are required to have a 2657 * NULL name pointer. 2658 */ 2659 if (sgp->sg_name == NULL) 2660 continue; 2661 2662 /* Remember the first non-disabled segment */ 2663 if ((first_seg == NULL) && !(sgp->sg_flags & FLG_SG_DISABLED)) 2664 first_seg = sgp; 2665 2666 /* 2667 * If a segment has an explicit virtual address, we will 2668 * need to sort the segments. 2669 */ 2670 if (sgp->sg_flags & FLG_SG_P_VADDR) 2671 ofl->ofl_flags1 |= FLG_OF1_VADDR; 2672 2673 /* 2674 * The FLG_OF_OS_ORDER flag enables the code that does 2675 * output section ordering. Set if the segment has 2676 * a non-empty output section order list. 2677 */ 2678 if (alist_nitems(sgp->sg_os_order) > 0) 2679 ofl->ofl_flags |= FLG_OF_OS_ORDER; 2680 2681 /* 2682 * The version 1 and version 2 syntaxes for input section 2683 * ordering are different and incompatible enough that we 2684 * only allow the use of one or the other for a given segment: 2685 * 2686 * v1) The version 1 syntax has the user set the ?O flag on 2687 * the segment. If this is done, all input sections placed 2688 * via an entrance criteria that has a section name are to 2689 * be sorted, using the order of the entrance criteria 2690 * as the sort key. 2691 * 2692 * v2) The version 2 syntax has the user specify a name for 2693 * the entry criteria, and then provide a list of entry 2694 * criteria names via the IS_ORDER segment attribute. 2695 * Sections placed via the criteria listed in IS_ORDER 2696 * are sorted, and the others are not. 2697 * 2698 * Regardless of the syntax version used, the section sorting 2699 * code expects the following: 2700 * 2701 * - Segments requiring input section sorting have the 2702 * FLG_SG_IS_ORDER flag set 2703 * 2704 * - Entrance criteria referencing the segment that 2705 * participate in input section sorting have a non-zero 2706 * sort key in their ec_ordndx field. 2707 * 2708 * At this point, the following are true: 2709 * 2710 * - All entrance criteria have ec_ordndx set to 0. 2711 * - Segments that require the version 1 behavior have 2712 * the FLG_SG_IS_ORDER flag set, and the segments 2713 * sg_is_order list is empty. 2714 * - Segments that require the version 2 behavior do not 2715 * have FLG_SG_IS_ORDER set, and the sg_is_order list is 2716 * non-empty. This list contains the names of the entrance 2717 * criteria that will participate in input section sorting, 2718 * and their relative order in the list provides the 2719 * sort key to use. 2720 * 2721 * We must detect these two cases, set the FLG_SG_IS_ORDER 2722 * flag as necessary, and fill in all entrance criteria 2723 * sort keys. If any input section sorting is to be done, 2724 * we also set the FLG_OF_IS_ORDER flag on the output descriptor 2725 * to enable the code that does that work. 2726 */ 2727 2728 /* Version 1: ?O flag? */ 2729 if (sgp->sg_flags & FLG_SG_IS_ORDER) { 2730 Word index = 0; 2731 2732 ofl->ofl_flags |= FLG_OF_IS_ORDER; 2733 DBG_CALL(Dbg_map_ent_ord_title(ofl->ofl_lml, 2734 sgp->sg_name)); 2735 2736 /* 2737 * Give each user defined entrance criteria for this 2738 * segment that specifies a section name a 2739 * monotonically increasing sort key. 2740 */ 2741 for (APLIST_TRAVERSE(ofl->ofl_ents, idx2, enp)) 2742 if ((enp->ec_segment == sgp) && 2743 (enp->ec_is_name != NULL) && 2744 ((enp->ec_flags & FLG_EC_BUILTIN) == 0)) 2745 enp->ec_ordndx = ++index; 2746 continue; 2747 } 2748 2749 /* Version 2: SEGMENT IS_ORDER list? */ 2750 if (aplist_nitems(sgp->sg_is_order) > 0) { 2751 Word index = 0; 2752 2753 ofl->ofl_flags |= FLG_OF_IS_ORDER; 2754 DBG_CALL(Dbg_map_ent_ord_title(ofl->ofl_lml, 2755 sgp->sg_name)); 2756 2757 /* 2758 * Give each entrance criteria in the sg_is_order 2759 * list a monotonically increasing sort key. 2760 */ 2761 for (APLIST_TRAVERSE(sgp->sg_is_order, idx2, enp)) { 2762 enp->ec_ordndx = ++index; 2763 enp->ec_segment->sg_flags |= FLG_SG_IS_ORDER; 2764 } 2765 } 2766 } 2767 2768 /* Sort the segment descriptors if necessary */ 2769 if (((ofl->ofl_flags1 & FLG_OF1_VADDR) || 2770 (aplist_nitems(ofl->ofl_segs_order) > 0)) && 2771 !sort_seg_list(ofl)) 2772 return (FALSE); 2773 2774 /* 2775 * If the output file is a static file without an interpreter, and 2776 * if any virtual address is specified, then set the NOHDR flag for 2777 * backward compatibility. 2778 */ 2779 if (!(ofl->ofl_flags & (FLG_OF_DYNAMIC | FLG_OF_RELOBJ)) && 2780 !(ofl->ofl_osinterp) && (ofl->ofl_flags1 & FLG_OF1_VADDR)) 2781 ofl->ofl_dtflags_1 |= DF_1_NOHDR; 2782 2783 if (ofl->ofl_flags & FLG_OF_RELOBJ) { 2784 /* 2785 * NOHDR has no effect on a relocatable file. 2786 * Make sure this flag isn't set. 2787 */ 2788 ofl->ofl_dtflags_1 &= ~DF_1_NOHDR; 2789 } else if (first_seg != NULL) { 2790 /* 2791 * DF_1_NOHDR might have been set globally by the HDR_NOALLOC 2792 * directive. If not, then we want to check the per-segment 2793 * flag for the first loadable segment and propagate it 2794 * if set. 2795 */ 2796 if ((ofl->ofl_dtflags_1 & DF_1_NOHDR) == 0) { 2797 /* 2798 * If we sorted the segments, the first segment 2799 * may have changed. 2800 */ 2801 if ((ofl->ofl_flags1 & FLG_OF1_VADDR) || 2802 (aplist_nitems(ofl->ofl_segs_order) > 0)) { 2803 for (APLIST_TRAVERSE(ofl->ofl_segs, idx, sgp)) { 2804 if (sgp->sg_name == NULL) 2805 continue; 2806 if ((sgp->sg_flags & FLG_SG_DISABLED) == 2807 0) { 2808 first_seg = sgp; 2809 break; 2810 } 2811 } 2812 } 2813 2814 /* 2815 * If the per-segment NOHDR flag is set on our first 2816 * segment, then make it take effect. 2817 */ 2818 if (first_seg->sg_flags & FLG_SG_NOHDR) 2819 ofl->ofl_dtflags_1 |= DF_1_NOHDR; 2820 } 2821 2822 /* 2823 * For executable and shared objects, the first segment must 2824 * be loadable unless NOHDR was specified, because the ELF 2825 * header must simultaneously lie at offset 0 of the file and 2826 * be included in the first loadable segment. This isn't 2827 * possible if some other segment type starts the file 2828 */ 2829 if (!(ofl->ofl_dtflags_1 & DF_1_NOHDR) && 2830 (first_seg->sg_phdr.p_type != PT_LOAD)) { 2831 Conv_inv_buf_t inv_buf; 2832 2833 ld_eprintf(ofl, ERR_FATAL, 2834 MSG_INTL(MSG_SEG_FIRNOTLOAD), 2835 conv_phdr_type(ELFOSABI_SOLARIS, ld_targ.t_m.m_mach, 2836 first_seg->sg_phdr.p_type, 0, &inv_buf), 2837 first_seg->sg_name); 2838 return (FALSE); 2839 } 2840 } 2841 2842 /* 2843 * Mapfiles may have been used to create symbol definitions 2844 * with backing storage. Although the backing storage is 2845 * associated with an input section, the association of the 2846 * section to an output section (and segment) is initially 2847 * deferred. Now that all mapfile processing is complete, any 2848 * entrance criteria requirements have been processed, and 2849 * these backing storage sections can be associated with the 2850 * appropriate output section (and segment). 2851 */ 2852 if (ofl->ofl_maptext || ofl->ofl_mapdata) 2853 DBG_CALL(Dbg_sec_backing(ofl->ofl_lml)); 2854 2855 for (APLIST_TRAVERSE(ofl->ofl_maptext, idx, isp)) { 2856 if (ld_place_section(ofl, isp, NULL, 2857 ld_targ.t_id.id_text, NULL) == (Os_desc *)S_ERROR) 2858 return (FALSE); 2859 } 2860 2861 for (APLIST_TRAVERSE(ofl->ofl_mapdata, idx, isp)) { 2862 if (ld_place_section(ofl, isp, NULL, 2863 ld_targ.t_id.id_data, NULL) == (Os_desc *)S_ERROR) 2864 return (FALSE); 2865 } 2866 2867 return (TRUE); 2868 } 2869