1 /* 2 * Copyright (C) 1984-2002 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 12 /* 13 * Functions to define the character set 14 * and do things specific to the character set. 15 */ 16 17 #include "less.h" 18 #if HAVE_LOCALE 19 #include <locale.h> 20 #include <ctype.h> 21 #endif 22 23 public int utf_mode = 0; 24 25 #if !SMALL 26 /* 27 * Predefined character sets, 28 * selected by the LESSCHARSET environment variable. 29 */ 30 struct charset { 31 char *name; 32 int *p_flag; 33 char *desc; 34 } charsets[] = { 35 { "ascii", NULL, "8bcccbcc18b95.b" }, 36 { "dos", NULL, "8bcccbcc12bc5b223.b" }, 37 { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, 38 { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" }, 39 { "iso8859", NULL, "8bcccbcc18b95.33b." }, 40 { "koi8-r", NULL, "8bcccbcc18b95.b128." }, 41 { "next", NULL, "8bcccbcc18b95.bb125.bb" }, 42 { "utf-8", &utf_mode, "8bcccbcc18b." }, 43 { NULL, NULL, NULL } 44 }; 45 46 struct cs_alias { 47 char *name; 48 char *oname; 49 } cs_aliases[] = { 50 { "latin1", "iso8859" }, 51 { "latin9", "iso8859" }, 52 { NULL, NULL } 53 }; 54 55 #define IS_BINARY_CHAR 01 56 #define IS_CONTROL_CHAR 02 57 58 static char chardef[256]; 59 static char *binfmt = NULL; 60 public int binattr = AT_STANDOUT; 61 62 63 /* 64 * Define a charset, given a description string. 65 * The string consists of 256 letters, 66 * one for each character in the charset. 67 * If the string is shorter than 256 letters, missing letters 68 * are taken to be identical to the last one. 69 * A decimal number followed by a letter is taken to be a 70 * repetition of the letter. 71 * 72 * Each letter is one of: 73 * . normal character 74 * b binary character 75 * c control character 76 */ 77 static void 78 ichardef(s) 79 char *s; 80 { 81 register char *cp; 82 register int n; 83 register char v; 84 85 n = 0; 86 v = 0; 87 cp = chardef; 88 while (*s != '\0') 89 { 90 switch (*s++) 91 { 92 case '.': 93 v = 0; 94 break; 95 case 'c': 96 v = IS_CONTROL_CHAR; 97 break; 98 case 'b': 99 v = IS_BINARY_CHAR|IS_CONTROL_CHAR; 100 break; 101 102 case '0': case '1': case '2': case '3': case '4': 103 case '5': case '6': case '7': case '8': case '9': 104 n = (10 * n) + (s[-1] - '0'); 105 continue; 106 107 default: 108 error("invalid chardef", NULL_PARG); 109 quit(QUIT_ERROR); 110 /*NOTREACHED*/ 111 } 112 113 do 114 { 115 if (cp >= chardef + sizeof(chardef)) 116 { 117 error("chardef longer than 256", NULL_PARG); 118 quit(QUIT_ERROR); 119 /*NOTREACHED*/ 120 } 121 *cp++ = v; 122 } while (--n > 0); 123 n = 0; 124 } 125 126 while (cp < chardef + sizeof(chardef)) 127 *cp++ = v; 128 } 129 130 /* 131 * Define a charset, given a charset name. 132 * The valid charset names are listed in the "charsets" array. 133 */ 134 static int 135 icharset(name) 136 register char *name; 137 { 138 register struct charset *p; 139 register struct cs_alias *a; 140 141 if (name == NULL || *name == '\0') 142 return (0); 143 144 /* First see if the name is an alias. */ 145 for (a = cs_aliases; a->name != NULL; a++) 146 { 147 if (strcmp(name, a->name) == 0) 148 { 149 name = a->oname; 150 break; 151 } 152 } 153 154 for (p = charsets; p->name != NULL; p++) 155 { 156 if (strcmp(name, p->name) == 0) 157 { 158 ichardef(p->desc); 159 if (p->p_flag != NULL) 160 *(p->p_flag) = 1; 161 return (1); 162 } 163 } 164 165 error("invalid charset name", NULL_PARG); 166 quit(QUIT_ERROR); 167 /*NOTREACHED*/ 168 return (0); 169 } 170 171 #if HAVE_LOCALE 172 /* 173 * Define a charset, given a locale name. 174 */ 175 static void 176 ilocale() 177 { 178 register int c; 179 180 setlocale(LC_ALL, ""); 181 for (c = 0; c < (int) sizeof(chardef); c++) 182 { 183 if (isprint(c)) 184 chardef[c] = 0; 185 else if (iscntrl(c)) 186 chardef[c] = IS_CONTROL_CHAR; 187 else 188 chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; 189 } 190 } 191 #endif 192 193 /* 194 * Define the printing format for control chars. 195 */ 196 public void 197 setbinfmt(s) 198 char *s; 199 { 200 if (s == NULL || *s == '\0') 201 s = "*s<%X>"; 202 /* 203 * Select the attributes if it starts with "*". 204 */ 205 if (*s == '*') 206 { 207 switch (s[1]) 208 { 209 case 'd': binattr = AT_BOLD; break; 210 case 'k': binattr = AT_BLINK; break; 211 case 's': binattr = AT_STANDOUT; break; 212 case 'u': binattr = AT_UNDERLINE; break; 213 default: binattr = AT_NORMAL; break; 214 } 215 s += 2; 216 } 217 binfmt = s; 218 } 219 220 /* 221 * Initialize charset data structures. 222 */ 223 public void 224 init_charset() 225 { 226 register char *s; 227 228 s = lgetenv("LESSBINFMT"); 229 setbinfmt(s); 230 231 /* 232 * See if environment variable LESSCHARSET is defined. 233 */ 234 s = lgetenv("LESSCHARSET"); 235 if (icharset(s)) 236 return; 237 /* 238 * LESSCHARSET is not defined: try LESSCHARDEF. 239 */ 240 s = lgetenv("LESSCHARDEF"); 241 if (s != NULL && *s != '\0') 242 { 243 ichardef(s); 244 return; 245 } 246 247 #if HAVE_STRSTR 248 /* 249 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. 250 */ 251 if ((s = lgetenv("LC_ALL")) != NULL || 252 (s = lgetenv("LC_CTYPE")) != NULL || 253 (s = lgetenv("LANG")) != NULL) 254 { 255 if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL) 256 if (icharset("utf-8")) 257 return; 258 } 259 #endif 260 261 #if HAVE_LOCALE 262 /* 263 * Use setlocale. 264 */ 265 ilocale(); 266 #else 267 #if MSDOS_COMPILER 268 /* 269 * Default to "dos". 270 */ 271 (void) icharset("dos"); 272 #else 273 /* 274 * Default to "latin1". 275 */ 276 (void) icharset("latin1"); 277 #endif 278 #endif 279 } 280 281 /* 282 * Is a given character a "binary" character? 283 */ 284 public int 285 binary_char(c) 286 unsigned char c; 287 { 288 c &= 0377; 289 return (chardef[c] & IS_BINARY_CHAR); 290 } 291 292 /* 293 * Is a given character a "control" character? 294 */ 295 public int 296 control_char(c) 297 int c; 298 { 299 c &= 0377; 300 return (chardef[c] & IS_CONTROL_CHAR); 301 } 302 303 /* 304 * Return the printable form of a character. 305 * For example, in the "ascii" charset '\3' is printed as "^C". 306 */ 307 public char * 308 prchar(c) 309 int c; 310 { 311 static char buf[8]; 312 313 c &= 0377; 314 if (!control_char(c)) 315 snprintf(buf, sizeof(buf), "%c", c); 316 else if (c == ESC) 317 snprintf(buf, sizeof(buf), "ESC"); 318 #if IS_EBCDIC_HOST 319 else if (!binary_char(c) && c < 64) 320 snprintf(buf, sizeof(buf), "^%c", 321 /* 322 * This array roughly inverts CONTROL() #defined in less.h, 323 * and should be kept in sync with CONTROL() and IBM-1047. 324 */ 325 "@ABC.I.?...KLMNO" 326 "PQRS.JH.XY.." 327 "\\]^_" 328 "......W[.....EFG" 329 "..V....D....TU.Z"[c]); 330 #else 331 else if (c < 128 && !control_char(c ^ 0100)) 332 snprintf(buf, sizeof(buf), "^%c", c ^ 0100); 333 #endif 334 else 335 snprintf(buf, sizeof(buf), binfmt, c); 336 return (buf); 337 } 338 339 #else /* SMALL */ 340 341 public int binattr = AT_STANDOUT; 342 343 public void 344 init_charset() 345 { 346 return; 347 } 348 349 /* 350 * Is a given character a "binary" character? 351 */ 352 public int 353 binary_char(c) 354 unsigned char c; 355 { 356 return (!isprint(c) && !isspace(c)); 357 } 358 359 /* 360 * Is a given character a "control" character? 361 */ 362 public int 363 control_char(c) 364 int c; 365 { 366 return (iscntrl(c)); 367 } 368 369 /* 370 * Return the printable form of a character. 371 * For example, in the "ascii" charset '\3' is printed as "^C". 372 */ 373 public char * 374 prchar(c) 375 int c; 376 { 377 static char buf[8]; 378 379 c &= 0377; 380 if (!iscntrl(c)) 381 snprintf(buf, sizeof(buf), "%c", c); 382 else if (c == ESC) 383 snprintf(buf, sizeof(buf), "ESC"); 384 else if (c < 128 && !iscntrl(c ^ 0100)) 385 snprintf(buf, sizeof(buf), "^%c", c ^ 0100); 386 else 387 snprintf(buf, sizeof(buf), "*s<%X>", c); 388 return (buf); 389 } 390 #endif /* SMALL */ 391