1;;; characters.el --- set syntax and category for multibyte characters -*- lexical-binding: t; -*- 2 3;; Copyright (C) 1997, 2000-2021 Free Software Foundation, Inc. 4;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 5;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 6;; National Institute of Advanced Industrial Science and Technology (AIST) 7;; Registration Number H14PRO021 8;; Copyright (C) 2003 9;; National Institute of Advanced Industrial Science and Technology (AIST) 10;; Registration Number H13PRO009 11 12;; Keywords: multibyte character, character set, syntax, category 13 14;; This file is part of GNU Emacs. 15 16;; GNU Emacs is free software: you can redistribute it and/or modify 17;; it under the terms of the GNU General Public License as published by 18;; the Free Software Foundation, either version 3 of the License, or 19;; (at your option) any later version. 20 21;; GNU Emacs is distributed in the hope that it will be useful, 22;; but WITHOUT ANY WARRANTY; without even the implied warranty of 23;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24;; GNU General Public License for more details. 25 26;; You should have received a copy of the GNU General Public License 27;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 28 29;;; Commentary: 30 31;;; Code: 32 33;;; Predefined categories. 34 35;; For each character set. 36 37(define-category ?a "ASCII 38ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])") 39(define-category ?l "Latin") 40(define-category ?t "Thai") 41(define-category ?g "Greek") 42(define-category ?b "Arabic") 43(define-category ?w "Hebrew") 44(define-category ?y "Cyrillic") 45(define-category ?k "Katakana 46Japanese katakana") 47(define-category ?r "Roman 48Japanese roman") 49(define-category ?c "Chinese") 50(define-category ?j "Japanese") 51(define-category ?h "Korean") 52(define-category ?e "Ethiopic 53Ethiopic (Ge'ez)") 54(define-category ?v "Viet 55Vietnamese") 56(define-category ?i "Indian") 57(define-category ?o "Lao") 58(define-category ?q "Tibetan") 59 60;; For each group (row) of 2-byte character sets. 61 62(define-category ?A "2-byte alnum 63Alphanumeric characters of 2-byte character sets") 64(define-category ?C "2-byte han 65Chinese (Han) characters of 2-byte character sets") 66(define-category ?G "2-byte Greek 67Greek characters of 2-byte character sets") 68(define-category ?H "2-byte Hiragana 69Japanese Hiragana characters of 2-byte character sets") 70(define-category ?K "2-byte Katakana 71Japanese Katakana characters of 2-byte character sets") 72(define-category ?N "2-byte Korean 73Korean Hangul characters of 2-byte character sets") 74(define-category ?Y "2-byte Cyrillic 75Cyrillic characters of 2-byte character sets") 76(define-category ?I "Indian Glyphs") 77 78;; For phonetic classifications. 79 80(define-category ?0 "consonant") 81(define-category ?1 "base vowel 82Base (independent) vowel") 83(define-category ?2 "upper diacritic 84Upper diacritical mark (including upper vowel)") 85(define-category ?3 "lower diacritic 86Lower diacritical mark (including lower vowel)") 87(define-category ?4 "combining tone 88Combining tone mark") 89(define-category ?5 "symbol") 90(define-category ?6 "digit") 91(define-category ?7 "vowel diacritic 92Vowel-modifying diacritical mark") 93(define-category ?8 "vowel-signs") 94(define-category ?9 "semivowel lower") 95 96;; For filling. 97(define-category ?| "line breakable 98While filling, we can break a line at this character.") 99 100;; For indentation calculation. 101(define-category ?\s 102 "space for indent 103This character counts as a space for indentation purposes.") 104 105;; Keep the following for `kinsoku' processing. See comments in 106;; kinsoku.el. 107(define-category ?> "Not at bol 108A character which can't be placed at beginning of line.") 109(define-category ?< "Not at eol 110A character which can't be placed at end of line.") 111 112;; Base and Combining 113(define-category ?. "Base 114Base characters (Unicode General Category L,N,P,S,Zs)") 115(define-category ?^ "Combining 116Combining diacritic or mark (Unicode General Category M)") 117 118;; bidi types 119(define-category ?R "Strong R2L 120Characters with \"strong\" right-to-left directionality, i.e. 121with R, AL, RLE, or RLO Unicode bidi character type.") 122 123(define-category ?L "Strong L2R 124Characters with \"strong\" left-to-right directionality, i.e. 125with L, LRE, or LRO Unicode bidi character type.") 126 127 128;;; Setting syntax and category. 129 130;; ASCII 131 132;; All ASCII characters have the category `a' (ASCII) and `l' (Latin). 133(modify-category-entry '(32 . 127) ?a) 134(modify-category-entry '(32 . 127) ?l) 135 136;; Deal with the CJK charsets first. Since the syntax of blocks is 137;; defined per charset, and the charsets may contain e.g. Latin 138;; characters, we end up with the wrong syntax definitions if we're 139;; not careful. 140 141;; Chinese characters (Unicode) 142(modify-category-entry '(#x2E80 . #x312F) ?|) 143(modify-category-entry '(#x3190 . #x33FF) ?|) 144(modify-category-entry '(#x3400 . #x4DB5) ?C) 145(modify-category-entry '(#x4E00 . #x9FD5) ?C) 146(modify-category-entry '(#x3400 . #x9FD5) ?c) 147(modify-category-entry '(#x3400 . #x9FD5) ?|) 148(modify-category-entry '(#xF900 . #xFAFF) ?C) 149(modify-category-entry '(#xF900 . #xFAFF) ?c) 150(modify-category-entry '(#xF900 . #xFAFF) ?|) 151(modify-category-entry '(#x1B170 . #x1B2FF) ?c) 152(modify-category-entry '(#x20000 . #x2FFFF) ?|) 153(modify-category-entry '(#x20000 . #x2FFFF) ?C) 154(modify-category-entry '(#x20000 . #x2FFFF) ?c) 155 156 157;; Chinese character set (GB2312) 158 159(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E) 160(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E) 161(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E) 162 163(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c) 164(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339) 165(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A) 166(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A) 167(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E) 168(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E) 169(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E) 170(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E) 171(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E) 172 173;; Chinese character set (BIG5) 174 175(map-charset-chars #'modify-category-entry 'big5 ?c) 176(map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA261) 177(map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E) 178(map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DC) 179 180;; Chinese character set (CNS11643) 181 182(dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 183 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 184 chinese-cns11643-7)) 185 (map-charset-chars #'modify-category-entry c ?c) 186 (if (eq c 'chinese-cns11643-1) 187 (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E) 188 (map-charset-chars #'modify-category-entry c ?C))) 189 190;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213) 191 192(map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k) 193 194(map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r) 195 196(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212 197 japanese-jisx0213-1 japanese-jisx0213-2 198 japanese-jisx0213.2004-1 199 cp932-2-byte)) 200 (map-charset-chars #'modify-category-entry l ?j)) 201 202;; Fullwidth characters 203(modify-category-entry '(#xff01 . #xff60) ?\|) 204 205;; Unicode equivalents of JISX0201-kana 206(let ((range '(#xff61 . #xff9f))) 207 (modify-category-entry range ?k) 208 (modify-category-entry range ?j) 209 (modify-category-entry range ?\|)) 210 211;; Katakana block 212(modify-category-entry '(#x3099 . #x309C) ?K) 213(modify-category-entry '(#x30A0 . #x30FF) ?K) 214(modify-category-entry '(#x31F0 . #x31FF) ?K) 215(modify-category-entry '(#x30A0 . #x30FA) ?\|) 216(modify-category-entry #x30FF ?\|) 217(modify-category-entry '(#x1AFF0 . #x1B000) ?K) 218(modify-category-entry '(#x1B120 . #x1B122) ?K) 219(modify-category-entry '(#x1B164 . #x1B167) ?K) 220 221;; Hiragana block 222(modify-category-entry '(#x3040 . #x309F) ?H) 223(modify-category-entry '(#x3040 . #x3096) ?\|) 224(modify-category-entry #x309F ?\|) 225(modify-category-entry #x30A0 ?H) 226(modify-category-entry #x30FC ?H) 227(modify-category-entry #x1B001 ?H) 228(modify-category-entry #x1B11F ?H) 229(modify-category-entry '(#x1B150 . #x1B152) ?H) 230(modify-category-entry '(#x1B002 . #x1B11E) ?H) ; Hentiagana 231 232(modify-category-entry '(#x1AFF0 . #x1B1FF) ?j) 233 234 235;; JISX0208 236;; Note: Some of these have their syntax updated later below. 237(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E) 238(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E) 239(let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇))) 240 (dolist (elt chars) 241 (modify-syntax-entry elt "w"))) 242 243(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E) 244(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E) 245(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E) 246(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E) 247(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E) 248(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E) 249(let ((chars '(?仝 ?々 ?〆 ?〇))) 250 (while chars 251 (modify-category-entry (car chars) ?C) 252 (setq chars (cdr chars)))) 253 254;; JISX0212 255 256(map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E) 257 258;; JISX0201-Kana 259 260(let ((chars '(?。 ?、 ?・))) 261 (while chars 262 (modify-syntax-entry (car chars) ".") 263 (setq chars (cdr chars)))) 264 265(modify-syntax-entry ?\「 "(」") 266(modify-syntax-entry ?\」 "(「") 267 268;; Korean character set (KSC5601) 269 270(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h) 271 272(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E) 273(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E) 274(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E) 275(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x2975) 276(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339) 277(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A) 278(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A) 279(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E) 280(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E) 281(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E) 282(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E) 283 284;; These are in more than one charset. 285(let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛" 286 "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄" 287 "()[]{}")) 288 open close) 289 (dotimes (i (/ (length parens) 2)) 290 (setq open (aref parens (* i 2)) 291 close (aref parens (1+ (* i 2)))) 292 (modify-syntax-entry open (format "(%c" close)) 293 (modify-syntax-entry close (format ")%c" open)))) 294 295;; Arabic character set 296 297(let ((charsets '(arabic-iso8859-6 298 arabic-digit 299 arabic-1-column 300 arabic-2-column))) 301 (while charsets 302 (map-charset-chars #'modify-category-entry (car charsets) ?b) 303 (setq charsets (cdr charsets)))) 304(modify-category-entry '(#x600 . #x6ff) ?b) 305(modify-category-entry '(#x870 . #x8ff) ?b) 306(modify-category-entry '(#xfb50 . #xfdff) ?b) 307(modify-category-entry '(#xfe70 . #xfefe) ?b) 308 309;; Cyrillic character set (ISO-8859-5) 310 311(modify-syntax-entry ?№ ".") 312 313;; Ethiopic character set 314 315(modify-category-entry '(#x1200 . #x1399) ?e) 316(modify-category-entry '(#X2D80 . #X2DDE) ?e) 317(modify-category-entry '(#xAB01 . #xAB2E) ?e) 318(modify-category-entry '(#x1E7E0 . #x1E7FE) ?e) 319(let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨))) 320 (while chars 321 (modify-syntax-entry (car chars) ".") 322 (setq chars (cdr chars)))) 323(map-charset-chars #'modify-category-entry 'ethiopic ?e) 324 325;; Hebrew character set (ISO-8859-8) 326 327(modify-syntax-entry #x5be ".") ; MAQAF 328(modify-syntax-entry #x5c0 ".") ; PASEQ 329(modify-syntax-entry #x5c3 ".") ; SOF PASUQ 330(modify-syntax-entry #x5c6 ".") ; NUN HAFUKHA 331(modify-syntax-entry #x5f3 ".") ; GERESH 332(modify-syntax-entry #x5f4 ".") ; GERSHAYIM 333 334;; Indian character set (IS 13194 and other Emacs original Indian charsets) 335 336(modify-category-entry '(#x901 . #x970) ?i) 337(map-charset-chars #'modify-category-entry 'indian-is13194 ?i) 338(map-charset-chars #'modify-category-entry 'indian-2-column ?i) 339 340;; Lao character set 341 342(modify-category-entry '(#xe80 . #xeff) ?o) 343(map-charset-chars #'modify-category-entry 'lao ?o) 344 345(let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant 346 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base 347 ("ັິ-ືົໍ" "w" ?2) ; vowel upper 348 ("ຸູ" "w" ?3) ; vowel lower 349 ("່-໋" "w" ?4) ; tone mark 350 ("ຼຽ" "w" ?9) ; semivowel lower 351 ("໐-໙" "w" ?6) ; digit 352 ("ຯໆ" "_" ?5) ; symbol 353 )) 354 elm chars len syntax category to ch i) 355 (while deflist 356 (setq elm (car deflist)) 357 (setq chars (car elm) 358 len (length chars) 359 syntax (nth 1 elm) 360 category (nth 2 elm) 361 i 0) 362 (while (< i len) 363 (if (= (aref chars i) ?-) 364 (setq i (1+ i) 365 to (aref chars i)) 366 (setq ch (aref chars i) 367 to ch)) 368 (while (<= ch to) 369 (unless (string-equal syntax "w") 370 (modify-syntax-entry ch syntax)) 371 (modify-category-entry ch category) 372 (setq ch (1+ ch))) 373 (setq i (1+ i))) 374 (setq deflist (cdr deflist)))) 375 376;; Thai character set (TIS620) 377 378(modify-category-entry '(#xe00 . #xe7f) ?t) 379(map-charset-chars #'modify-category-entry 'thai-tis620 ?t) 380 381(let ((deflist '(;; chars syntax category 382 ("ก-รลว-ฮ" "w" ?0) ; consonant 383 ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base 384 ("ัิ-ื็๎" "w" ?2) ; vowel upper 385 ("ุ-ฺ" "w" ?3) ; vowel lower 386 ("่-ํ" "w" ?4) ; tone mark 387 ("๐-๙" "w" ?6) ; digit 388 ("ฯๆ฿๏๚๛" "_" ?5) ; symbol 389 )) 390 elm chars len syntax category to ch i) 391 (while deflist 392 (setq elm (car deflist)) 393 (setq chars (car elm) 394 len (length chars) 395 syntax (nth 1 elm) 396 category (nth 2 elm) 397 i 0) 398 (while (< i len) 399 (if (= (aref chars i) ?-) 400 (setq i (1+ i) 401 to (aref chars i)) 402 (setq ch (aref chars i) 403 to ch)) 404 (while (<= ch to) 405 (unless (string-equal syntax "w") 406 (modify-syntax-entry ch syntax)) 407 (modify-category-entry ch category) 408 (setq ch (1+ ch))) 409 (setq i (1+ i))) 410 (setq deflist (cdr deflist)))) 411 412;; Tibetan character set 413 414(modify-category-entry '(#xf00 . #xfff) ?q) 415(map-charset-chars #'modify-category-entry 'tibetan ?q) 416(map-charset-chars #'modify-category-entry 'tibetan-1-column ?q) 417 418(let ((deflist '(;; chars syntax category 419 ("ཀ-ཀྵཪ" "w" ?0) ; consonant 420 ("ྐ-ྐྵྺྻྼ" "w" ?0) ; 421 ("ིེཻོཽྀ" "w" ?2) ; upper vowel 422 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier 423 ("྄ཱུ༙༵༷" "w" ?3) ; lower vowel/modifier 424 ("" "w" ?3) ; invisible vowel a 425 ("༠-༩༪-༳" "w" ?6) ; digit 426 ("་།-༒༔ཿ" "." ?|) ; line-break char 427 ("་།༏༐༑༔ཿ" "." ?|) ; 428 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition 429 ("་།༏༐༑༔ཿ" "." ?>) ; 430 ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition 431 ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others 432 )) 433 elm chars len syntax category to ch i) 434 (while deflist 435 (setq elm (car deflist)) 436 (setq chars (car elm) 437 len (length chars) 438 syntax (nth 1 elm) 439 category (nth 2 elm) 440 i 0) 441 (while (< i len) 442 (if (= (aref chars i) ?-) 443 (setq i (1+ i) 444 to (aref chars i)) 445 (setq ch (aref chars i) 446 to ch)) 447 (while (<= ch to) 448 (unless (string-equal syntax "w") 449 (modify-syntax-entry ch syntax)) 450 (modify-category-entry ch category) 451 (setq ch (1+ ch))) 452 (setq i (1+ i))) 453 (setq deflist (cdr deflist)))) 454 455;; Vietnamese character set 456 457;; To make a word with Latin characters 458(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l) 459(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v) 460 461(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l) 462(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v) 463 464(let ((tbl (standard-case-table)) 465 (i 32)) 466 (while (< i 128) 467 (let* ((char (decode-char 'vietnamese-viscii-upper i)) 468 (charl (decode-char 'vietnamese-viscii-lower i)) 469 (uc (encode-char char 'ucs)) 470 (lc (encode-char charl 'ucs))) 471 (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i) 472 tbl) 473 (if uc (modify-category-entry uc ?v)) 474 (if lc (modify-category-entry lc ?v))) 475 (setq i (1+ i)))) 476 477;; Tai Viet 478(let ((deflist '(;; chars syntax category 479 ((?ꪀ. ?ꪯ) "w" ?0) ; consonant 480 ("ꪱꪵꪶ" "w" ?1) ; vowel base 481 ((?ꪹ . ?ꪽ) "w" ?1) ; vowel base 482 ("ꪰꪲꪳꪷꪸꪾ" "w" ?2) ; vowel upper 483 ("ꪴ" "w" ?3) ; vowel lower 484 ("ꫀꫂ" "w" ?1) ; non-combining tone-mark 485 ("꪿꫁" "w" ?4) ; combining tone-mark 486 ((?ꫛ . ?꫟) "_" ?5) ; symbol 487 ))) 488 (dolist (elm deflist) 489 (let ((chars (car elm)) 490 (syntax (nth 1 elm)) 491 (category (nth 2 elm))) 492 (if (consp chars) 493 (progn 494 (modify-syntax-entry chars syntax) 495 (modify-category-entry chars category)) 496 (mapc (lambda (x) 497 (modify-syntax-entry x syntax) 498 (modify-category-entry x category)) 499 chars))))) 500 501;; Bidi categories 502 503;; If bootstrapping without generated uni-*.el files, table not defined. 504(let ((table (unicode-property-table-internal 'bidi-class))) 505 (when table 506 (map-char-table (lambda (key val) 507 (cond 508 ((memq val '(R AL RLO RLE)) 509 (modify-category-entry key ?R)) 510 ((memq val '(L LRE LRO)) 511 (modify-category-entry key ?L)))) 512 table))) 513 514;; Load uni-mirrored.el and uni-brackets.el if available, so that they 515;; get dumped into Emacs. This allows starting Emacs with 516;; force-load-messages in ~/.emacs, and avoid infinite recursion in 517;; bidi_initialize, which needs to load uni-mirrored.el and 518;; uni-brackets.el in order to display the "Loading" messages. 519(unicode-property-table-internal 'mirroring) 520(unicode-property-table-internal 'bracket-type) 521 522;; Latin 523 524(modify-category-entry '(#x80 . #x024F) ?l) 525 526(let ((tbl (standard-case-table)) c) 527 528 ;; Latin-1 529 530 ;; Fixme: Some of the non-word syntaxes here perhaps should be 531 ;; reviewed. (Note that the following all implicitly have word 532 ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of 533 ;; relating Unicode categories to Emacs syntax codes. 534 535 ;; FIXME: We should probably just use the Unicode properties to set 536 ;; up the syntax table. 537 538 (set-case-syntax ?¡ "." tbl) 539 (set-case-syntax ?¦ "_" tbl) 540 (set-case-syntax ?§ "." tbl) 541 (set-case-syntax ?© "_" tbl) 542 ;; French wants 543 ;; (set-case-syntax-delims ?« ?» tbl) 544 ;; And German wants 545 ;; (set-case-syntax-delims ?» ?« tbl) 546 ;; So let's stay neutral and let users set these up if/when they want to. 547 (set-case-syntax ?« "." tbl) 548 (set-case-syntax ?» "." tbl) 549 (set-case-syntax ?¬ "_" tbl) 550 (set-case-syntax ? "_" tbl) 551 (set-case-syntax ?® "_" tbl) 552 (set-case-syntax ?° "_" tbl) 553 (set-case-syntax ?± "_" tbl) 554 (set-case-syntax ?µ "_" tbl) 555 (set-case-syntax ?· "_" tbl) 556 (set-case-syntax ?¼ "_" tbl) 557 (set-case-syntax ?½ "_" tbl) 558 (set-case-syntax ?¾ "_" tbl) 559 (set-case-syntax ?¿ "." tbl) 560 (set-case-syntax ?× "_" tbl) 561 (set-case-syntax ?ß "w" tbl) 562 (set-case-syntax ?÷ "_" tbl) 563 ;; See below for ÿ. 564 565 ;; Latin Extended-A, Latin Extended-B 566 (setq c #x0100) 567 (while (<= c #x02B8) 568 (modify-category-entry c ?l) 569 (setq c (1+ c))) 570 571 ;; Latin Extended Additional 572 (modify-category-entry '(#x1E00 . #x1EF9) ?l) 573 574 ;; Latin Extended-C 575 (setq c #x2C60) 576 (while (<= c #x2C7F) 577 (modify-category-entry c ?l) 578 (setq c (1+ c))) 579 580 ;; Latin Extended-D 581 (setq c #xA720) 582 (while (<= c #xA7FF) 583 (modify-category-entry c ?l) 584 (setq c (1+ c))) 585 586 ;; Latin Extended-E 587 (setq c #xAB30) 588 (while (<= c #xAB64) 589 (modify-category-entry c ?l) 590 (setq c (1+ c))) 591 592 ;; Latin Extended-G 593 (setq c #x1DF00) 594 (while (<= c #x1DFFF) 595 (modify-category-entry c ?l) 596 (setq c (1+ c))) 597 598 ;; Greek 599 (modify-category-entry '(#x0370 . #x03FF) ?g) 600 601 ;; Armenian 602 (setq c #x531) 603 604 ;; Greek Extended 605 (modify-category-entry '(#x1F00 . #x1FFF) ?g) 606 607 ;; cyrillic 608 (modify-category-entry '(#x0400 . #x04FF) ?y) 609 (modify-category-entry '(#xA640 . #xA69F) ?y) 610 611 ;; Georgian 612 (setq c #x10A0) 613 614 ;; Cyrillic Extended-C 615 (modify-category-entry '(#x1C80 . #x1C8F) ?y) 616 617 ;; space characters (see section 6.2 in the Unicode Standard) 618 (set-case-syntax ? " " tbl) 619 (setq c #x2000) 620 (while (<= c #x200b) 621 (set-case-syntax c " " tbl) 622 (setq c (1+ c))) 623 (let ((chars '(#x202F #x205F #x3000))) 624 (while chars 625 (set-case-syntax (car chars) " " tbl) 626 (setq chars (cdr chars)))) 627 ;; general punctuation 628 (while (<= c #x200F) 629 (set-case-syntax c "." tbl) 630 (setq c (1+ c))) 631 (setq c #x2010) 632 ;; Fixme: What to do with characters that have Pi and Pf 633 ;; Unicode properties? 634 (while (<= c #x2017) 635 (set-case-syntax c "." tbl) 636 (setq c (1+ c))) 637 ;; Punctuation syntax for quotation marks (like `) 638 (while (<= c #x201F) 639 (set-case-syntax c "." tbl) 640 (setq c (1+ c))) 641 (while (<= c #x2027) 642 (set-case-syntax c "." tbl) 643 (setq c (1+ c))) 644 (setq c #x2030) 645 (while (<= c #x205E) 646 (set-case-syntax c "." tbl) 647 (setq c (1+ c))) 648 (let ((chars '(?‹ ?› ?⁄ ?⁒))) 649 (while chars 650 (modify-syntax-entry (car chars) "_") 651 (setq chars (cdr chars)))) 652 653 ;; Arrows 654 (setq c #x2190) 655 (while (<= c #x21FF) 656 (set-case-syntax c "_" tbl) 657 (setq c (1+ c))) 658 ;; Mathematical Operators 659 (while (<= c #x22FF) 660 (set-case-syntax c "_" tbl) 661 (setq c (1+ c))) 662 ;; Miscellaneous Technical 663 (while (<= c #x23FF) 664 (set-case-syntax c "_" tbl) 665 (setq c (1+ c))) 666 ;; Control Pictures 667 (while (<= c #x244F) 668 (set-case-syntax c "_" tbl) 669 (setq c (1+ c))) 670 671 ;; Circled Latin 672 (setq c #x24B6) 673 (while (<= c #x24CF) 674 (modify-category-entry c ?l) 675 (modify-category-entry (+ c 26) ?l) 676 (setq c (1+ c))) 677 678 ;; Supplemental Mathematical Operators 679 (setq c #x2A00) 680 (while (<= c #x2AFF) 681 (set-case-syntax c "_" tbl) 682 (setq c (1+ c))) 683 684 ;; Miscellaneous Symbols and Arrows 685 (setq c #x2B00) 686 (while (<= c #x2BFF) 687 (set-case-syntax c "_" tbl) 688 (setq c (1+ c))) 689 690 ;; Coptic 691 ;; There's no Coptic category. However, Coptic letters that are 692 ;; part of the Greek block above get the Greek category, and those 693 ;; in this block are derived from Greek letters, so let's be 694 ;; consistent about their category. 695 (modify-category-entry '(#x2C80 . #x2CFF) ?g) 696 697 ;; Supplemental Punctuation 698 (setq c #x2E00) 699 (while (<= c #x2E7F) 700 (set-case-syntax c "." tbl) 701 (setq c (1+ c))) 702 703 ;; Ideographic punctuation 704 (setq c #x3001) 705 (while (<= c #x3003) 706 (set-case-syntax c "." tbl) 707 (setq c (1+ c))) 708 (set-case-syntax #x30FB "." tbl) 709 710 ;; Symbols for Legacy Computing 711 (setq c #x1FB00) 712 (while (<= c #x1FBCA) 713 (set-case-syntax c "_" tbl) 714 (setq c (1+ c))) 715 ;; FIXME: Should these be digits? 716 (while (<= c #x1FBFF) 717 (set-case-syntax c "." tbl) 718 (setq c (1+ c))) 719 720 ;; Fullwidth Latin 721 (setq c #xFF01) 722 (while (<= c #xFF0F) 723 (set-case-syntax c "." tbl) 724 (setq c (1+ c))) 725 (set-case-syntax #xFF04 "_" tbl) 726 (set-case-syntax #xFF0B "_" tbl) 727 (set-case-syntax #xFF1A "." tbl) 728 (set-case-syntax #xFF1B "." tbl) 729 (set-case-syntax #xFF1F "." tbl) 730 (set-case-syntax #xFF20 "." tbl) 731 (setq c #xFF21) 732 (while (<= c #xFF3A) 733 (modify-category-entry c ?l) 734 (modify-category-entry (+ c #x20) ?l) 735 (setq c (1+ c))) 736 737 ;; Halfwidth Latin 738 (setq c #xFF64) 739 (while (<= c #xFF65) 740 (set-case-syntax c "." tbl) 741 (setq c (1+ c))) 742 (set-case-syntax #xFF61 "." tbl) 743 744 ;; Combining diacritics 745 (modify-category-entry '(#x300 . #x362) ?^) 746 ;; Combining marks 747 (modify-category-entry '(#x20d0 . #x20ff) ?^) 748 749 (let ((gc (unicode-property-table-internal 'general-category)) 750 (syn-table (standard-syntax-table))) 751 ;; In early bootstrapping Unicode tables are not available so we need to 752 ;; skip this step in those cases. 753 (when gc 754 ;; Set all Letter, uppercase; Letter, lowercase and Letter, 755 ;; titlecase syntax to word. 756 (map-char-table 757 (lambda (ch cat) 758 (when (memq cat '(Lu Ll Lt)) 759 (modify-syntax-entry ch "w " syn-table))) 760 gc) 761 ;; Ⅰ through Ⅻ had word syntax in the past so set it here as well. 762 ;; The general category of those characters is Number, Letter. 763 (modify-syntax-entry '(#x2160 . #x216b) "w " syn-table) 764 765 ;; ⓐ through ⓩ are symbols, other according to Unicode but Emacs set 766 ;; their syntax to word in the past so keep backwards compatibility. 767 (modify-syntax-entry '(#x24D0 . #x24E9) "w " syn-table) 768 769 ;; Set downcase and upcase from Unicode properties 770 771 ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I and 772 ;; U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so do U+0130 773 ;; LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I. 774 775 ;; We used to set up half of those correspondence unconditionally, but 776 ;; that makes searches slow. So now we don't set up either half of these 777 ;; correspondences by default. 778 779 ;; (set-downcase-syntax ?İ ?i tbl) 780 ;; (set-upcase-syntax ?I ?ı tbl) 781 782 (let ((map-unicode-property 783 (lambda (property func) 784 (map-char-table 785 (lambda (ch cased) 786 ;; ASCII characters skipped due to reasons outlined above. As 787 ;; of Unicode 9.0, this exception affects the following: 788 ;; lc(U+0130 İ) = i 789 ;; uc(U+0131 ı) = I 790 ;; uc(U+017F ſ) = S 791 ;; uc(U+212A K) = k 792 (when (> cased 127) 793 (let ((end (if (consp ch) (cdr ch) ch))) 794 (setq ch (max 128 (if (consp ch) (car ch) ch))) 795 (while (<= ch end) 796 (funcall func ch cased) 797 (setq ch (1+ ch)))))) 798 (unicode-property-table-internal property)))) 799 (down tbl) 800 (up (case-table-get-table tbl 'up))) 801 802 ;; This works on an assumption that if toUpper(x) != x then toLower(x) 803 ;; == x (and the opposite for toLower/toUpper). This doesn’t hold for 804 ;; title case characters but those incorrect mappings will be 805 ;; overwritten later. 806 (funcall map-unicode-property 'uppercase 807 (lambda (lc uc) (aset down lc lc) (aset up uc uc))) 808 (funcall map-unicode-property 'lowercase 809 (lambda (uc lc) (aset down lc lc) (aset up uc uc))) 810 811 ;; Now deal with the actual mapping. This will correctly assign casing 812 ;; for title-case characters. 813 (funcall map-unicode-property 'uppercase 814 (lambda (lc uc) (aset up lc uc) (aset up uc uc))) 815 (funcall map-unicode-property 'lowercase 816 (lambda (uc lc) (aset down uc lc) (aset down lc lc))) 817 818 ;; Override the Unicode uppercase property for ß, since we are 819 ;; using our case tables for determining the case of a 820 ;; character (see uppercasep and lowercasep in buffer.h). 821 ;; The special-uppercase property of ß ensures that it is 822 ;; still upcased to SS per the usual convention. 823 (aset up ?ß ?ẞ)))) 824 825 ;; Clear out the extra slots so that they will be recomputed from the main 826 ;; (downcase) table and upcase table. Since we’re side-stepping the usual 827 ;; set-case-syntax-* functions, we need to do it explicitly. 828 (set-char-table-extra-slot tbl 1 nil) 829 (set-char-table-extra-slot tbl 2 nil) 830 831 ;; Fixme: syntax for symbols &c 832 ) 833 834(let ((pairs 835 '("⁅⁆" ; U+2045 U+2046 836 "⁽⁾" ; U+207D U+207E 837 "₍₎" ; U+208D U+208E 838 "〈〉" ; U+2329 U+232A 839 "⎴⎵" ; U+23B4 U+23B5 840 "❨❩" ; U+2768 U+2769 841 "❪❫" ; U+276A U+276B 842 "❬❭" ; U+276C U+276D 843 "❰❱" ; U+2770 U+2771 844 "❲❳" ; U+2772 U+2773 845 "❴❵" ; U+2774 U+2775 846 "⟦⟧" ; U+27E6 U+27E7 847 "⟨⟩" ; U+27E8 U+27E9 848 "⟪⟫" ; U+27EA U+27EB 849 "⦃⦄" ; U+2983 U+2984 850 "⦅⦆" ; U+2985 U+2986 851 "⦇⦈" ; U+2987 U+2988 852 "⦉⦊" ; U+2989 U+298A 853 "⦋⦌" ; U+298B U+298C 854 "⦍⦎" ; U+298D U+298E 855 "⦏⦐" ; U+298F U+2990 856 "⦑⦒" ; U+2991 U+2992 857 "⦓⦔" ; U+2993 U+2994 858 "⦕⦖" ; U+2995 U+2996 859 "⦗⦘" ; U+2997 U+2998 860 "⧼⧽" ; U+29FC U+29FD 861 "〈〉" ; U+3008 U+3009 862 "《》" ; U+300A U+300B 863 "「」" ; U+300C U+300D 864 "『』" ; U+300E U+300F 865 "【】" ; U+3010 U+3011 866 "〔〕" ; U+3014 U+3015 867 "〖〗" ; U+3016 U+3017 868 "〘〙" ; U+3018 U+3019 869 "〚〛" ; U+301A U+301B 870 "﴾﴿" ; U+FD3E U+FD3F 871 "︵︶" ; U+FE35 U+FE36 872 "︷︸" ; U+FE37 U+FE38 873 "︹︺" ; U+FE39 U+FE3A 874 "︻︼" ; U+FE3B U+FE3C 875 "︽︾" ; U+FE3D U+FE3E 876 "︿﹀" ; U+FE3F U+FE40 877 "﹁﹂" ; U+FE41 U+FE42 878 "﹃﹄" ; U+FE43 U+FE44 879 "﹙﹚" ; U+FE59 U+FE5A 880 "﹛﹜" ; U+FE5B U+FE5C 881 "﹝﹞" ; U+FE5D U+FE5E 882 "()" ; U+FF08 U+FF09 883 "[]" ; U+FF3B U+FF3D 884 "{}" ; U+FF5B U+FF5D 885 "⦅⦆" ; U+FF5F U+FF60 886 "「」" ; U+FF62 U+FF63 887 ))) 888 (dolist (elt pairs) 889 (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1))) 890 (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0))))) 891 892 893;; For each character set, put the information of the most proper 894;; coding system to encode it by `preferred-coding-system' property. 895 896;; Fixme: should this be junked? 897(let ((l '((latin-iso8859-1 . iso-latin-1) 898 (latin-iso8859-2 . iso-latin-2) 899 (latin-iso8859-3 . iso-latin-3) 900 (latin-iso8859-4 . iso-latin-4) 901 (thai-tis620 . thai-tis620) 902 (greek-iso8859-7 . greek-iso-8bit) 903 (arabic-iso8859-6 . iso-2022-7bit) 904 (hebrew-iso8859-8 . hebrew-iso-8bit) 905 (katakana-jisx0201 . japanese-shift-jis) 906 (latin-jisx0201 . japanese-shift-jis) 907 (cyrillic-iso8859-5 . cyrillic-iso-8bit) 908 (latin-iso8859-9 . iso-latin-5) 909 (japanese-jisx0208-1978 . iso-2022-jp) 910 (chinese-gb2312 . chinese-iso-8bit) 911 (chinese-gbk . chinese-gbk) 912 (gb18030-2-byte . chinese-gb18030) 913 (gb18030-4-byte-bmp . chinese-gb18030) 914 (gb18030-4-byte-smp . chinese-gb18030) 915 (gb18030-4-byte-ext-1 . chinese-gb18030) 916 (gb18030-4-byte-ext-2 . chinese-gb18030) 917 (japanese-jisx0208 . iso-2022-jp) 918 (korean-ksc5601 . iso-2022-kr) 919 (japanese-jisx0212 . iso-2022-jp) 920 (chinese-big5-1 . chinese-big5) 921 (chinese-big5-2 . chinese-big5) 922 (chinese-sisheng . iso-2022-7bit) 923 (ipa . iso-2022-7bit) 924 (vietnamese-viscii-lower . vietnamese-viscii) 925 (vietnamese-viscii-upper . vietnamese-viscii) 926 (arabic-digit . iso-2022-7bit) 927 (arabic-1-column . iso-2022-7bit) 928 (lao . lao) 929 (arabic-2-column . iso-2022-7bit) 930 (indian-is13194 . devanagari) 931 (indian-glyph . devanagari) 932 (tibetan-1-column . tibetan) 933 (ethiopic . iso-2022-7bit) 934 (chinese-cns11643-1 . iso-2022-cn) 935 (chinese-cns11643-2 . iso-2022-cn) 936 (chinese-cns11643-3 . iso-2022-cn) 937 (chinese-cns11643-4 . iso-2022-cn) 938 (chinese-cns11643-5 . iso-2022-cn) 939 (chinese-cns11643-6 . iso-2022-cn) 940 (chinese-cns11643-7 . iso-2022-cn) 941 (indian-2-column . devanagari) 942 (tibetan . tibetan) 943 (latin-iso8859-14 . iso-latin-8) 944 (latin-iso8859-15 . iso-latin-9)))) 945 (while l 946 (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l))) 947 (setq l (cdr l)))) 948 949 950;; Setup auto-fill-chars for charsets that should invoke auto-filling. 951;; SPACE and NEWLINE are already set. 952 953(set-char-table-range auto-fill-chars '(#x3041 . #x30FF) t) 954(set-char-table-range auto-fill-chars '(#x3400 . #x4DB5) t) 955(set-char-table-range auto-fill-chars '(#x4e00 . #x9fbb) t) 956(set-char-table-range auto-fill-chars '(#xF900 . #xFAFF) t) 957(set-char-table-range auto-fill-chars '(#xFF00 . #xFF9F) t) 958(set-char-table-range auto-fill-chars '(#x20000 . #x2FFFF) t) 959 960 961;;; Setting char-width-table. The default is 1. 962 963;; 0: non-spacing, enclosing combining, formatting, Hangul Jamo medial 964;; and final characters. 965(let ((l '((#x0300 . #x036F) 966 (#x0483 . #x0489) 967 (#x0591 . #x05BD) 968 (#x05BF . #x05BF) 969 (#x05C1 . #x05C2) 970 (#x05C4 . #x05C5) 971 (#x05C7 . #x05C7) 972 (#x0600 . #x0605) 973 (#x0610 . #x061C) 974 (#x064B . #x065F) 975 (#x0670 . #x0670) 976 (#x06D6 . #x06E4) 977 (#x06E7 . #x06E8) 978 (#x06EA . #x06ED) 979 (#x070F . #x070F) 980 (#x0711 . #x0711) 981 (#x0730 . #x074A) 982 (#x07A6 . #x07B0) 983 (#x07EB . #x07F3) 984 (#x0816 . #x0823) 985 (#x0825 . #x082D) 986 (#x0859 . #x085B) 987 (#x08D4 . #x0902) 988 (#x093A . #x093A) 989 (#x093C . #x093C) 990 (#x0941 . #x0948) 991 (#x094D . #x094D) 992 (#x0951 . #x0957) 993 (#x0962 . #x0963) 994 (#x0981 . #x0981) 995 (#x09BC . #x09BC) 996 (#x09C1 . #x09C4) 997 (#x09CD . #x09CD) 998 (#x09E2 . #x09E3) 999 (#x0A01 . #x0A02) 1000 (#x0A3C . #x0A3C) 1001 (#x0A41 . #x0A4D) 1002 (#x0A41 . #x0A42) 1003 (#x0A47 . #x0A48) 1004 (#x0A4B . #x0A4D) 1005 (#x0A51 . #x0A51) 1006 (#x0A70 . #x0A71) 1007 (#x0A75 . #x0A75) 1008 (#x0A81 . #x0A82) 1009 (#x0ABC . #x0ABC) 1010 (#x0AC1 . #x0AC8) 1011 (#x0ACD . #x0ACD) 1012 (#x0AE2 . #x0AE3) 1013 (#x0B01 . #x0B01) 1014 (#x0B3C . #x0B3C) 1015 (#x0B3F . #x0B3F) 1016 (#x0B41 . #x0B44) 1017 (#x0B4D . #x0B56) 1018 (#x0B62 . #x0B63) 1019 (#x0B82 . #x0B82) 1020 (#x0BC0 . #x0BC0) 1021 (#x0BCD . #x0BCD) 1022 (#x0C00 . #x0C00) 1023 (#x0C3E . #x0C40) 1024 (#x0C46 . #x0C56) 1025 (#x0C62 . #x0C63) 1026 (#x0C81 . #x0C81) 1027 (#x0CBC . #x0CBC) 1028 (#x0CCC . #x0CCD) 1029 (#x0CE2 . #x0CE3) 1030 (#x0D01 . #x0D01) 1031 (#x0D41 . #x0D44) 1032 (#x0D4D . #x0D4D) 1033 (#x0D62 . #x0D63) 1034 (#x0D81 . #x0D81) 1035 (#x0DCA . #x0DCA) 1036 (#x0DD2 . #x0DD6) 1037 (#x0E31 . #x0E31) 1038 (#x0E34 . #x0E3A) 1039 (#x0E47 . #x0E4E) 1040 (#x0EB1 . #x0EB1) 1041 (#x0EB4 . #x0EBC) 1042 (#x0EC8 . #x0ECD) 1043 (#x0F18 . #x0F19) 1044 (#x0F35 . #x0F35) 1045 (#x0F37 . #x0F37) 1046 (#x0F39 . #x0F39) 1047 (#x0F71 . #x0F7E) 1048 (#x0F80 . #x0F84) 1049 (#x0F86 . #x0F87) 1050 (#x0F8D . #x0FBC) 1051 (#x0FC6 . #x0FC6) 1052 (#x102D . #x1030) 1053 (#x1032 . #x1037) 1054 (#x1039 . #x103A) 1055 (#x103D . #x103E) 1056 (#x1058 . #x1059) 1057 (#x105E . #x1060) 1058 (#x1071 . #x1074) 1059 (#x1082 . #x1082) 1060 (#x1085 . #x1086) 1061 (#x108D . #x108D) 1062 (#x109D . #x109D) 1063 (#x1160 . #x11FF) 1064 (#x135D . #x135F) 1065 (#x1712 . #x1714) 1066 (#x1732 . #x1734) 1067 (#x1752 . #x1753) 1068 (#x1772 . #x1773) 1069 (#x17B4 . #x17B5) 1070 (#x17B7 . #x17BD) 1071 (#x17C6 . #x17C6) 1072 (#x17C9 . #x17D3) 1073 (#x17DD . #x17DD) 1074 (#x180B . #x180E) 1075 (#x18A9 . #x18A9) 1076 (#x1885 . #x1886) 1077 (#x18A9 . #x18A9) 1078 (#x1920 . #x1922) 1079 (#x1927 . #x1928) 1080 (#x1932 . #x1932) 1081 (#x1939 . #x193B) 1082 (#x1A17 . #x1A18) 1083 (#x1A1B . #x1A1B) 1084 (#x1A56 . #x1A56) 1085 (#x1A58 . #x1A5E) 1086 (#x1A60 . #x1A60) 1087 (#x1A62 . #x1A62) 1088 (#x1A65 . #x1A6C) 1089 (#x1A73 . #x1A7C) 1090 (#x1A7F . #x1A7F) 1091 (#x1AB0 . #x1AC0) 1092 (#x1B00 . #x1B03) 1093 (#x1B34 . #x1B34) 1094 (#x1B36 . #x1B3A) 1095 (#x1B3C . #x1B3C) 1096 (#x1B42 . #x1B42) 1097 (#x1B6B . #x1B73) 1098 (#x1B80 . #x1B81) 1099 (#x1BA2 . #x1BA5) 1100 (#x1BA8 . #x1BA9) 1101 (#x1BAB . #x1BAD) 1102 (#x1BE6 . #x1BE6) 1103 (#x1BE8 . #x1BE9) 1104 (#x1BED . #x1BED) 1105 (#x1BEF . #x1BF1) 1106 (#x1C2C . #x1C33) 1107 (#x1C36 . #x1C37) 1108 (#x1CD0 . #x1CD2) 1109 (#x1CD4 . #x1CE0) 1110 (#x1CE2 . #x1CE8) 1111 (#x1CED . #x1CED) 1112 (#x1CF4 . #x1CF4) 1113 (#x1CF8 . #x1CF9) 1114 (#x1DC0 . #x1DFF) 1115 (#x200B . #x200F) 1116 (#x202A . #x202E) 1117 (#x2060 . #x206F) 1118 (#x20D0 . #x20F0) 1119 (#x2CEF . #x2CF1) 1120 (#x2D7F . #x2D7F) 1121 (#x2DE0 . #x2DFF) 1122 (#xA66F . #xA672) 1123 (#xA674 . #xA69F) 1124 (#xA6F0 . #xA6F1) 1125 (#xA802 . #xA802) 1126 (#xA806 . #xA806) 1127 (#xA80B . #xA80B) 1128 (#xA825 . #xA826) 1129 (#xA82C . #xA82C) 1130 (#xA8C4 . #xA8C5) 1131 (#xA8E0 . #xA8F1) 1132 (#xA926 . #xA92D) 1133 (#xA947 . #xA951) 1134 (#xA980 . #xA9B3) 1135 (#xA9B6 . #xA9B9) 1136 (#xA9BC . #xA9BC) 1137 (#xA9E5 . #xA9E5) 1138 (#xAA29 . #xAA2E) 1139 (#xAA31 . #xAA32) 1140 (#xAA35 . #xAA36) 1141 (#xAA43 . #xAA43) 1142 (#xAA4C . #xAA4C) 1143 (#xAA7C . #xAA7C) 1144 (#xAAB0 . #xAAB0) 1145 (#xAAB2 . #xAAB4) 1146 (#xAAB7 . #xAAB8) 1147 (#xAABE . #xAABF) 1148 (#xAAC1 . #xAAC1) 1149 (#xAAEC . #xAAED) 1150 (#xAAF6 . #xAAF6) 1151 (#xABE5 . #xABE5) 1152 (#xABE8 . #xABE8) 1153 (#xABED . #xABED) 1154 (#xD7B0 . #xD7FB) 1155 (#xFB1E . #xFB1E) 1156 (#xFE00 . #xFE0F) 1157 (#xFE20 . #xFE2F) 1158 (#xFEFF . #xFEFF) 1159 (#xFFF9 . #xFFFB) 1160 (#x101FD . #x101FD) 1161 (#x102E0 . #x102E0) 1162 (#x10376 . #x1037A) 1163 (#x10A01 . #x10A0F) 1164 (#x10A38 . #x10A3F) 1165 (#x10AE5 . #x10AE6) 1166 (#x10EAB . #x10EAC) 1167 (#x11001 . #x11001) 1168 (#x11038 . #x11046) 1169 (#x1107F . #x11081) 1170 (#x110B3 . #x110B6) 1171 (#x110B9 . #x110BA) 1172 (#x110BD . #x110BD) 1173 (#x11100 . #x11102) 1174 (#x11127 . #x1112B) 1175 (#x1112D . #x11134) 1176 (#x11173 . #x11173) 1177 (#x11180 . #x11181) 1178 (#x111B6 . #x111BE) 1179 (#x111CA . #x111CC) 1180 (#x111CF . #x111CF) 1181 (#x1122F . #x11231) 1182 (#x11234 . #x11234) 1183 (#x11236 . #x11237) 1184 (#x1123E . #x1123E) 1185 (#x112DF . #x112DF) 1186 (#x112E3 . #x112EA) 1187 (#x11300 . #x11301) 1188 (#x1133C . #x1133C) 1189 (#x11340 . #x11340) 1190 (#x11366 . #x1136C) 1191 (#x11370 . #x11374) 1192 (#x11438 . #x1143F) 1193 (#x11442 . #x11444) 1194 (#x11446 . #x11446) 1195 (#x114B3 . #x114B8) 1196 (#x114BA . #x114C0) 1197 (#x114C2 . #x114C3) 1198 (#x115B2 . #x115B5) 1199 (#x115BC . #x115BD) 1200 (#x115BF . #x115C0) 1201 (#x115DC . #x115DD) 1202 (#x11633 . #x1163A) 1203 (#x1163D . #x1163D) 1204 (#x1163F . #x11640) 1205 (#x116AB . #x116AB) 1206 (#x116AD . #x116AD) 1207 (#x116B0 . #x116B5) 1208 (#x116B7 . #x116B7) 1209 (#x1171D . #x1171F) 1210 (#x11722 . #x11725) 1211 (#x11727 . #x1172B) 1212 (#x1193B . #x1193C) 1213 (#x1193E . #x1193E) 1214 (#x11943 . #x11943) 1215 (#x11C30 . #x11C36) 1216 (#x11C38 . #x11C3D) 1217 (#x11C92 . #x11CA7) 1218 (#x11CAA . #x11CB0) 1219 (#x11CB2 . #x11CB3) 1220 (#x11CB5 . #x11CB6) 1221 (#x16AF0 . #x16AF4) 1222 (#x16B30 . #x16B36) 1223 (#x16F8F . #x16F92) 1224 (#x16FE4 . #x16FE4) 1225 (#x1BC9D . #x1BC9E) 1226 (#x1BCA0 . #x1BCA3) 1227 (#x1D167 . #x1D169) 1228 (#x1D173 . #x1D182) 1229 (#x1D185 . #x1D18B) 1230 (#x1D1AA . #x1D1AD) 1231 (#x1D242 . #x1D244) 1232 (#x1DA00 . #x1DA36) 1233 (#x1DA3B . #x1DA6C) 1234 (#x1DA75 . #x1DA75) 1235 (#x1DA84 . #x1DA84) 1236 (#x1DA9B . #x1DA9F) 1237 (#x1DAA1 . #x1DAAF) 1238 (#x1E000 . #x1E006) 1239 (#x1E008 . #x1E018) 1240 (#x1E01B . #x1E021) 1241 (#x1E023 . #x1E024) 1242 (#x1E026 . #x1E02A) 1243 (#x1E8D0 . #x1E8D6) 1244 (#x1E944 . #x1E94A) 1245 (#xE0001 . #xE01EF)))) 1246 (dolist (elt l) 1247 (set-char-table-range char-width-table elt 0))) 1248 1249;; 2: East Asian Wide and Full-width characters. 1250(let ((l '((#x1100 . #x115F) 1251 (#x231A . #x231B) 1252 (#x2329 . #x232A) 1253 (#x23E9 . #x23EC) 1254 (#x23F0 . #x23F0) 1255 (#x23F3 . #x23F3) 1256 (#x25FD . #x25FE) 1257 (#x2614 . #x2615) 1258 (#x2648 . #x2653) 1259 (#x267F . #x267F) 1260 (#x2693 . #x2693) 1261 (#x26A1 . #x26A1) 1262 (#x26AA . #x26AB) 1263 (#x26BD . #x26BE) 1264 (#x26C4 . #x26C5) 1265 (#x26CE . #x26CE) 1266 (#x26D4 . #x26D4) 1267 (#x26EA . #x26EA) 1268 (#x26F2 . #x26F3) 1269 (#x26F5 . #x26F5) 1270 (#x26FA . #x26FA) 1271 (#x26FD . #x26FD) 1272 (#x2705 . #x2705) 1273 (#x270A . #x270B) 1274 (#x2728 . #x2728) 1275 (#x274C . #x274C) 1276 (#x274E . #x274E) 1277 (#x2753 . #x2755) 1278 (#x2757 . #x2757) 1279 (#x2795 . #x2797) 1280 (#x27B0 . #x27B0) 1281 (#x27BF . #x27BF) 1282 (#x2B1B . #x2B1C) 1283 (#x2B50 . #x2B50) 1284 (#x2B55 . #x2B55) 1285 (#x2E80 . #x303E) 1286 (#x3040 . #x3247) 1287 (#x3250 . #x4DBF) 1288 (#x4E00 . #x9FFF) 1289 (#xA490 . #xA4C6) 1290 (#xA960 . #xA97F) 1291 (#xAC00 . #xD7A3) 1292 (#xF900 . #xFAFF) 1293 (#xFE10 . #xFE19) 1294 (#xFE30 . #xFE6F) 1295 (#xFF01 . #xFF60) 1296 (#xFFE0 . #xFFE6) 1297 (#x16FE0 . #x16FE4) 1298 (#x16FF0 . #x16FF1) 1299 (#x17000 . #x187F7) 1300 (#x18800 . #x18AFF) 1301 (#x18B00 . #x18CD5) 1302 (#x1AFF0 . #x1AFFF) 1303 (#x1B000 . #x1B152) 1304 (#x1B164 . #x1B167) 1305 (#x1B170 . #x1B2FB) 1306 (#x1F004 . #x1F004) 1307 (#x1F0CF . #x1F0CF) 1308 (#x1F18E . #x1F18E) 1309 (#x1F191 . #x1F19A) 1310 (#x1F1AD . #x1F1AD) 1311 (#x1F200 . #x1F320) 1312 (#x1F32D . #x1F335) 1313 (#x1F337 . #x1F37C) 1314 (#x1F37E . #x1F393) 1315 (#x1F3A0 . #x1F3CA) 1316 (#x1F3CF . #x1F3D3) 1317 (#x1F3E0 . #x1F3F0) 1318 (#x1F3F4 . #x1F3F4) 1319 (#x1F3F8 . #x1F3FA) 1320 (#x1F3FB . #x1F3FF) 1321 (#x1F400 . #x1F43E) 1322 (#x1F440 . #x1F440) 1323 (#x1F442 . #x1F4FC) 1324 (#x1F4FF . #x1F53D) 1325 (#x1F54B . #x1F54E) 1326 (#x1F550 . #x1F567) 1327 (#x1F57A . #x1F57A) 1328 (#x1F595 . #x1F596) 1329 (#x1F5A4 . #x1F5A4) 1330 (#x1F5FB . #x1F5FF) 1331 (#x1F600 . #x1F64F) 1332 (#x1F680 . #x1F6C5) 1333 (#x1F6CC . #x1F6CC) 1334 (#x1F6D0 . #x1F6D2) 1335 (#x1F6D5 . #x1F6D7) 1336 (#x1F6DD . #x1F6DF) 1337 (#x1F6EB . #x1F6EC) 1338 (#x1F6F4 . #x1F6FC) 1339 (#x1F7E0 . #x1F7F0) 1340 (#x1F90C . #x1F93A) 1341 (#x1F93C . #x1F945) 1342 (#x1F947 . #x1F9FF) 1343 (#x1FA00 . #x1FA53) 1344 (#x1FA60 . #x1FA6D) 1345 (#x1FA70 . #x1FA74) 1346 (#x1FA78 . #x1FA7C) 1347 (#x1FA80 . #x1FA86) 1348 (#x1FA90 . #x1FAAC) 1349 (#x1FAB0 . #x1FABA) 1350 (#x1FAC0 . #x1FAC5) 1351 (#x1FAD0 . #x1FAD9) 1352 (#x1FAE0 . #x1FAE7) 1353 (#x1FAF0 . #x1FAF6) 1354 (#x1FB00 . #x1FB92) 1355 (#x20000 . #x2FFFF) 1356 (#x30000 . #x3FFFF)))) 1357 (dolist (elt l) 1358 (set-char-table-range char-width-table elt 2))) 1359 1360;; Other double width 1361;;(map-charset-chars 1362;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) 1363;; 'ethiopic) 1364;; (map-charset-chars 1365;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) 1366;; 'tibetan) 1367(map-charset-chars 1368 (lambda (range _ignore) (set-char-table-range char-width-table range 2)) 1369 'indian-2-column) 1370(map-charset-chars 1371 (lambda (range _ignore) (set-char-table-range char-width-table range 2)) 1372 'arabic-2-column) 1373 1374;; Internal use only. 1375;; Alist of locale symbol vs charsets. In a language environment 1376;; corresponding to the locale, width of characters in the charsets is 1377;; set to 2. Each element has the form: 1378;; (LOCALE TABLE (CHARSET (FROM-CODE . TO-CODE) ...) ...) 1379;; LOCALE: locale symbol 1380;; TABLE: char-table used for char-width-table, initially nil. 1381;; CHARSET: character set 1382;; FROM-CODE, TO-CODE: range of code-points in CHARSET 1383 1384(defvar cjk-char-width-table-list 1385 '((ja_JP nil (japanese-jisx0208 (#x2121 . #x287E)) 1386 (cp932-2-byte (#x8140 . #x879F))) 1387 (zh_CN nil (chinese-gb2312 (#x2121 . #x297E))) 1388 (zh_HK nil (big5-hkscs (#xA140 . #xA3FE) (#xC6A0 . #xC8FE))) 1389 (zh_TW nil (big5 (#xA140 . #xA3FE)) 1390 (chinese-cns11643-1 (#x2121 . #x427E))) 1391 (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E))))) 1392 1393;; Internal use only. 1394;; Setup char-width-table appropriate for a language environment 1395;; corresponding to LOCALE-NAME (symbol). 1396 1397(defun use-cjk-char-width-table (locale-name) 1398 (while (char-table-parent char-width-table) 1399 (setq char-width-table (char-table-parent char-width-table))) 1400 (let ((slot (assq locale-name cjk-char-width-table-list))) 1401 (or slot (error "Unknown locale for CJK language environment: %s" 1402 locale-name)) 1403 (unless (nth 1 slot) 1404 (let ((table (make-char-table nil))) 1405 (dolist (charset-info (nthcdr 2 slot)) 1406 (let ((charset (car charset-info))) 1407 (dolist (code-range (cdr charset-info)) 1408 (map-charset-chars (lambda (range _arg) 1409 (set-char-table-range table range 2)) 1410 charset nil 1411 (car code-range) (cdr code-range))))) 1412 (optimize-char-table table) 1413 (set-char-table-parent table char-width-table) 1414 (setcar (cdr slot) table))) 1415 (setq char-width-table (nth 1 slot)))) 1416 1417(defun use-default-char-width-table () 1418 "Internal use only. 1419Setup `char-width-table' appropriate for non-CJK language environment." 1420 (while (char-table-parent char-width-table) 1421 (setq char-width-table (char-table-parent char-width-table)))) 1422 1423(optimize-char-table (standard-case-table)) 1424(optimize-char-table (standard-syntax-table)) 1425 1426 1427;; Setting char-script-table. 1428(if dump-mode 1429 ;; While dumping, we can't use require, and international is not 1430 ;; in load-path. 1431 (progn 1432 (load "international/charscript") 1433 (load "international/emoji-zwj")) 1434 (progn 1435 (require 'charscript) 1436 (require 'emoji-zwj))) 1437 1438(map-charset-chars 1439 (lambda (range _ignore) 1440 (set-char-table-range char-script-table range 'tibetan)) 1441 'tibetan) 1442 1443 1444;;; Setting unicode-category-table. 1445 1446(when (setq unicode-category-table 1447 (unicode-property-table-internal 'general-category)) 1448 (map-char-table (lambda (key val) 1449 (if val 1450 (cond ((or (and (/= (aref (symbol-name val) 0) ?M) 1451 (/= (aref (symbol-name val) 0) ?C)) 1452 (eq val 'Zs)) 1453 (modify-category-entry key ?.)) 1454 ((eq val 'Mn) 1455 (modify-category-entry key ?^))))) 1456 unicode-category-table)) 1457 1458(optimize-char-table (standard-category-table)) 1459 1460 1461;; Display of glyphless characters. 1462 1463(defvar char-acronym-table 1464 (make-char-table 'char-acronym-table nil) 1465 "Char table of acronyms for non-graphic characters.") 1466 1467(let ((c0-acronyms '("NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" "BEL" 1468 "BS" nil nil "VT" "FF" "CR" "SO" "SI" 1469 "DLE" "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" 1470 "CAN" "EM" "SUB" "ESC" "FC" "GS" "RS" "US"))) 1471 (dotimes (i 32) 1472 (aset char-acronym-table i (car c0-acronyms)) 1473 (setq c0-acronyms (cdr c0-acronyms)))) 1474 1475(let ((c1-acronyms '("PAD" "HOP" "BPH" "NBH" "IND" "NEL" "SSA" "ESA" 1476 "HTS" "HTJ" "VTS" "PLD" "PLU" "R1" "SS2" "SS1" 1477 "DCS" "PU1" "PU2" "STS" "CCH" "MW" "SPA" "EPA" 1478 "SOS" "SGCI" "SC1" "CSI" "ST" "OSC" "PM" "APC"))) 1479 (dotimes (i 32) 1480 (aset char-acronym-table (+ #x0080 i) (car c1-acronyms)) 1481 (setq c1-acronyms (cdr c1-acronyms)))) 1482 1483(aset char-acronym-table #x17B4 "KIVAQ") ; KHMER VOWEL INHERENT AQ 1484(aset char-acronym-table #x17B5 "KIVAA") ; KHMER VOWEL INHERENT AA 1485(aset char-acronym-table #x200B "ZWSP") ; ZERO WIDTH SPACE 1486(aset char-acronym-table #x200C "ZWNJ") ; ZERO WIDTH NON-JOINER 1487(aset char-acronym-table #x200D "ZWJ") ; ZERO WIDTH JOINER 1488(aset char-acronym-table #x200E "LRM") ; LEFT-TO-RIGHT MARK 1489(aset char-acronym-table #x200F "RLM") ; RIGHT-TO-LEFT MARK 1490(aset char-acronym-table #x202A "LRE") ; LEFT-TO-RIGHT EMBEDDING 1491(aset char-acronym-table #x202B "RLE") ; RIGHT-TO-LEFT EMBEDDING 1492(aset char-acronym-table #x202C "PDF") ; POP DIRECTIONAL FORMATTING 1493(aset char-acronym-table #x202D "LRO") ; LEFT-TO-RIGHT OVERRIDE 1494(aset char-acronym-table #x202E "RLO") ; RIGHT-TO-LEFT OVERRIDE 1495(aset char-acronym-table #x2060 "WJ") ; WORD JOINER 1496(aset char-acronym-table #x2066 "LRI") ; LEFT-TO-RIGHT ISOLATE 1497(aset char-acronym-table #x2067 "RLI") ; RIGHT-TO-LEFT ISOLATE 1498(aset char-acronym-table #x2069 "PDI") ; POP DIRECTIONAL ISOLATE 1499(aset char-acronym-table #x206A "ISS") ; INHIBIT SYMMETRIC SWAPPING 1500(aset char-acronym-table #x206B "ASS") ; ACTIVATE SYMMETRIC SWAPPING 1501(aset char-acronym-table #x206C "IAFS") ; INHIBIT ARABIC FORM SHAPING 1502(aset char-acronym-table #x206D "AAFS") ; ACTIVATE ARABIC FORM SHAPING 1503(aset char-acronym-table #x206E "NADS") ; NATIONAL DIGIT SHAPES 1504(aset char-acronym-table #x206F "NODS") ; NOMINAL DIGIT SHAPES 1505(aset char-acronym-table #xFEFF "ZWNBSP") ; ZERO WIDTH NO-BREAK SPACE 1506(aset char-acronym-table #xFFF9 "IAA") ; INTERLINEAR ANNOTATION ANCHOR 1507(aset char-acronym-table #xFFFA "IAS") ; INTERLINEAR ANNOTATION SEPARATOR 1508(aset char-acronym-table #xFFFB "IAT") ; INTERLINEAR ANNOTATION TERMINATOR 1509(aset char-acronym-table #x1D173 "BEGBM") ; MUSICAL SYMBOL BEGIN BEAM 1510(aset char-acronym-table #x1D174 "ENDBM") ; MUSICAL SYMBOL END BEAM 1511(aset char-acronym-table #x1D175 "BEGTIE") ; MUSICAL SYMBOL BEGIN TIE 1512(aset char-acronym-table #x1D176 "END") ; MUSICAL SYMBOL END TIE 1513(aset char-acronym-table #x1D177 "BEGSLR") ; MUSICAL SYMBOL BEGIN SLUR 1514(aset char-acronym-table #x1D178 "ENDSLR") ; MUSICAL SYMBOL END SLUR 1515(aset char-acronym-table #x1D179 "BEGPHR") ; MUSICAL SYMBOL BEGIN PHRASE 1516(aset char-acronym-table #x1D17A "ENDPHR") ; MUSICAL SYMBOL END PHRASE 1517(aset char-acronym-table #xE0001 "|->TAG") ; LANGUAGE TAG 1518(aset char-acronym-table #xE0020 "SP TAG") ; TAG SPACE 1519(dotimes (i 94) 1520 (aset char-acronym-table (+ #xE0021 i) (format " %c TAG" (+ 33 i)))) 1521(aset char-acronym-table #xE007F "->|TAG") ; CANCEL TAG 1522 1523;; We can't use the \N{name} things here, because this file is used 1524;; too early in the build process. 1525(defvar glyphless--bidi-control-characters 1526 '(#x202a ; ?\N{left-to-right embedding} 1527 #x202b ; ?\N{right-to-left embedding} 1528 #x202d ; ?\N{left-to-right override} 1529 #x202e ; ?\N{right-to-left override} 1530 #x2066 ; ?\N{left-to-right isolate} 1531 #x2067 ; ?\N{right-to-left isolate} 1532 #x2068 ; ?\N{first strong isolate} 1533 #x202c ; ?\N{pop directional formatting} 1534 #x2069)) ; ?\N{pop directional isolate}) 1535 1536(defun update-glyphless-char-display (&optional variable value) 1537 "Make the setting of `glyphless-char-display-control' take effect. 1538This function updates the char-table `glyphless-char-display', 1539and is intended to be used in the `:set' attribute of the 1540option `glyphless-char-display'." 1541 (when variable 1542 (set-default variable value)) 1543 (dolist (elt value) 1544 (let ((target (car elt)) 1545 (method (cdr elt))) 1546 (unless (memq method '( zero-width thin-space empty-box 1547 acronym hex-code bidi-control)) 1548 (error "Invalid glyphless character display method: %s" method)) 1549 (cond ((eq target 'c0-control) 1550 (glyphless-set-char-table-range glyphless-char-display 1551 #x00 #x1F method) 1552 ;; Users will not expect their newlines and TABs be 1553 ;; displayed as anything but themselves, so exempt those 1554 ;; two characters from c0-control. 1555 (set-char-table-range glyphless-char-display #x9 nil) 1556 (set-char-table-range glyphless-char-display #xa nil)) 1557 ((eq target 'c1-control) 1558 (glyphless-set-char-table-range glyphless-char-display 1559 #x80 #x9F method)) 1560 ((eq target 'variation-selectors) 1561 (glyphless-set-char-table-range glyphless-char-display 1562 #xFE00 #xFE0F method)) 1563 ((or (eq target 'format-control) 1564 (eq target 'bidi-control)) 1565 (when unicode-category-table 1566 (map-char-table 1567 (lambda (char category) 1568 (when (eq category 'Cf) 1569 (let ((this-method method) 1570 from to) 1571 (if (consp char) 1572 (setq from (car char) to (cdr char)) 1573 (setq from char to char)) 1574 (while (<= from to) 1575 (when (/= from #xAD) 1576 (when (eq method 'acronym) 1577 (setq this-method 1578 (or (aref char-acronym-table from) 1579 "UNK"))) 1580 (when (or (eq target 'format-control) 1581 (memq from 1582 glyphless--bidi-control-characters)) 1583 (set-char-table-range glyphless-char-display 1584 from this-method))) 1585 (setq from (1+ from)))))) 1586 unicode-category-table))) 1587 ((eq target 'no-font) 1588 (set-char-table-extra-slot glyphless-char-display 0 method)) 1589 (t 1590 (error "Invalid glyphless character group: %s" target)))))) 1591 1592(defun glyphless-set-char-table-range (chartable from to method) 1593 (if (eq method 'acronym) 1594 (let ((i from)) 1595 (while (<= i to) 1596 (set-char-table-range chartable i (aref char-acronym-table i)) 1597 (setq i (1+ i)))) 1598 (set-char-table-range chartable (cons from to) method))) 1599 1600;;; Control of displaying glyphless characters. 1601(define-widget 'glyphless-char-display-method 'lazy 1602 "Display method for glyphless characters." 1603 :group 'mule 1604 :format "%v" 1605 :value 'thin-space 1606 :type 1607 '(choice 1608 (const :tag "Don't display" zero-width) 1609 (const :tag "Display as thin space" thin-space) 1610 (const :tag "Display as empty box" empty-box) 1611 (const :tag "Display acronym" acronym) 1612 (const :tag "Display hex code in a box" hex-code))) 1613 1614(defcustom glyphless-char-display-control 1615 '((format-control . thin-space) 1616 (variation-selectors . thin-space) 1617 (no-font . hex-code)) 1618 "List of directives to control display of glyphless characters. 1619 1620Each element has the form (GROUP . METHOD), where GROUP is a 1621symbol specifying the character group, and METHOD is a symbol 1622specifying the method of displaying characters belonging to that 1623group. 1624 1625GROUP must be one of these symbols: 1626 `c0-control': U+0000..U+001F, but excluding newline and TAB. 1627 `c1-control': U+0080..U+009F. 1628 `format-control': Characters of Unicode General Category `Cf', 1629 such as U+200C (ZWNJ), U+200E (LRM), but 1630 excluding characters that have graphic images, 1631 such as U+00AD (SHY). 1632 `bidi-control': A subset of `format-control', but only characters 1633 that are relevant for bidirectional formatting control, 1634 like U+2069 (PDI) and U+202B (RLE). 1635 `variation-selectors': 1636 Characters in the range U+FE00..U+FE0F, used for 1637 selecting alternate glyph presentations, such as 1638 Emoji vs Text presentation, of the preceding 1639 character(s). 1640 `no-font': For GUI frames, characters for which no suitable 1641 font is found; for text-mode frames, characters 1642 that cannot be encoded by `terminal-coding-system'. 1643 1644METHOD must be one of these symbols: 1645 `zero-width': don't display. 1646 `thin-space': display a thin (1-pixel width) space. On character 1647 terminals, display as 1-character space. 1648 `empty-box': display an empty box. 1649 `acronym': display an acronym of the character in a box. The 1650 acronym is taken from `char-acronym-table', which see. 1651 `hex-code': display the hexadecimal character code in a box. 1652 1653Do not set its value directly from Lisp; the value takes effect 1654only via a custom `:set' 1655function (`update-glyphless-char-display'), which updates 1656`glyphless-char-display'." 1657 :version "28.1" 1658 :type '(alist :key-type (symbol :tag "Character Group") 1659 :value-type (symbol :tag "Display Method")) 1660 :options '((c0-control glyphless-char-display-method) 1661 (c1-control glyphless-char-display-method) 1662 (format-control glyphless-char-display-method) 1663 (bidi-control glyphless-char-display-method) 1664 (variation-selectors glyphless-char-display-method) 1665 (no-font (glyphless-char-display-method :value hex-code))) 1666 :set 'update-glyphless-char-display 1667 :group 'display) 1668 1669 1670;;; Setting word boundary. 1671 1672(setq word-combining-categories 1673 '((nil . ?^) 1674 (?^ . nil) 1675 (?C . ?H) 1676 (?C . ?K))) 1677 1678(setq word-separating-categories ; (2-byte character sets) 1679 '((?H . ?K) ; Hiragana - Katakana 1680 )) 1681 1682;; Local Variables: 1683;; coding: utf-8 1684;; End: 1685 1686;;; characters.el ends here 1687