1 /* $Id: chars.c,v 1.58 2014/07/23 15:00:08 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdlib.h> 25 #include <string.h> 26 27 #include "mandoc.h" 28 #include "mandoc_aux.h" 29 #include "libmandoc.h" 30 31 #define PRINT_HI 126 32 #define PRINT_LO 32 33 34 struct ln { 35 struct ln *next; 36 const char *code; 37 const char *ascii; 38 int unicode; 39 }; 40 41 #define LINES_MAX 330 42 43 #define CHAR(in, ch, code) \ 44 { NULL, (in), (ch), (code) }, 45 46 #define CHAR_TBL_START static struct ln lines[LINES_MAX] = { 47 #define CHAR_TBL_END }; 48 49 #include "chars.in" 50 51 struct mchars { 52 struct ln **htab; 53 }; 54 55 static const struct ln *find(const struct mchars *, 56 const char *, size_t); 57 58 59 void 60 mchars_free(struct mchars *arg) 61 { 62 63 free(arg->htab); 64 free(arg); 65 } 66 67 struct mchars * 68 mchars_alloc(void) 69 { 70 struct mchars *tab; 71 struct ln **htab; 72 struct ln *pp; 73 int i, hash; 74 75 /* 76 * Constructs a very basic chaining hashtable. The hash routine 77 * is simply the integral value of the first character. 78 * Subsequent entries are chained in the order they're processed. 79 */ 80 81 tab = mandoc_malloc(sizeof(struct mchars)); 82 htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *)); 83 84 for (i = 0; i < LINES_MAX; i++) { 85 hash = (int)lines[i].code[0] - PRINT_LO; 86 87 if (NULL == (pp = htab[hash])) { 88 htab[hash] = &lines[i]; 89 continue; 90 } 91 92 for ( ; pp->next; pp = pp->next) 93 /* Scan ahead. */ ; 94 pp->next = &lines[i]; 95 } 96 97 tab->htab = htab; 98 return(tab); 99 } 100 101 int 102 mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) 103 { 104 const struct ln *ln; 105 106 ln = find(arg, p, sz); 107 if (NULL == ln) 108 return(-1); 109 return(ln->unicode); 110 } 111 112 char 113 mchars_num2char(const char *p, size_t sz) 114 { 115 int i; 116 117 if ((i = mandoc_strntoi(p, sz, 10)) < 0) 118 return('\0'); 119 120 return(i > 0 && i < 256 && isprint(i) ? i : '\0'); 121 } 122 123 int 124 mchars_num2uc(const char *p, size_t sz) 125 { 126 int i; 127 128 if ((i = mandoc_strntoi(p, sz, 16)) < 0) 129 return('\0'); 130 131 /* 132 * Security warning: 133 * Never extend the range of accepted characters 134 * to overlap with the ASCII range, 0x00-0x7F 135 * without re-auditing the callers of this function. 136 * Some callers might relay on the fact that we never 137 * return ASCII characters for their escaping decisions. 138 * 139 * XXX Code is missing here to exclude bogus ranges. 140 */ 141 142 return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); 143 } 144 145 const char * 146 mchars_spec2str(const struct mchars *arg, 147 const char *p, size_t sz, size_t *rsz) 148 { 149 const struct ln *ln; 150 151 ln = find(arg, p, sz); 152 if (NULL == ln) { 153 *rsz = 1; 154 return(NULL); 155 } 156 157 *rsz = strlen(ln->ascii); 158 return(ln->ascii); 159 } 160 161 static const struct ln * 162 find(const struct mchars *tab, const char *p, size_t sz) 163 { 164 const struct ln *pp; 165 int hash; 166 167 assert(p); 168 169 if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI) 170 return(NULL); 171 172 hash = (int)p[0] - PRINT_LO; 173 174 for (pp = tab->htab[hash]; pp; pp = pp->next) 175 if (0 == strncmp(pp->code, p, sz) && 176 '\0' == pp->code[(int)sz]) 177 return(pp); 178 179 return(NULL); 180 } 181