1;;; characters.el --- set syntax and category for multibyte characters  -*- lexical-binding: t; -*-
2
3;; Copyright (C) 1997, 2000-2021 Free Software Foundation, Inc.
4;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
6;;   National Institute of Advanced Industrial Science and Technology (AIST)
7;;   Registration Number H14PRO021
8;; Copyright (C) 2003
9;;   National Institute of Advanced Industrial Science and Technology (AIST)
10;;   Registration Number H13PRO009
11
12;; Keywords: multibyte character, character set, syntax, category
13
14;; This file is part of GNU Emacs.
15
16;; GNU Emacs is free software: you can redistribute it and/or modify
17;; it under the terms of the GNU General Public License as published by
18;; the Free Software Foundation, either version 3 of the License, or
19;; (at your option) any later version.
20
21;; GNU Emacs is distributed in the hope that it will be useful,
22;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24;; GNU General Public License for more details.
25
26;; You should have received a copy of the GNU General Public License
27;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
28
29;;; Commentary:
30
31;;; Code:
32
33;;; Predefined categories.
34
35;; For each character set.
36
37(define-category ?a "ASCII
38ASCII graphic characters 32-126 (ISO646 IRV:1983[4/0])")
39(define-category ?l "Latin")
40(define-category ?t "Thai")
41(define-category ?g "Greek")
42(define-category ?b "Arabic")
43(define-category ?w "Hebrew")
44(define-category ?y "Cyrillic")
45(define-category ?k "Katakana
46Japanese katakana")
47(define-category ?r "Roman
48Japanese roman")
49(define-category ?c "Chinese")
50(define-category ?j "Japanese")
51(define-category ?h "Korean")
52(define-category ?e "Ethiopic
53Ethiopic (Ge'ez)")
54(define-category ?v "Viet
55Vietnamese")
56(define-category ?i "Indian")
57(define-category ?o "Lao")
58(define-category ?q "Tibetan")
59
60;; For each group (row) of 2-byte character sets.
61
62(define-category ?A "2-byte alnum
63Alphanumeric characters of 2-byte character sets")
64(define-category ?C "2-byte han
65Chinese (Han) characters of 2-byte character sets")
66(define-category ?G "2-byte Greek
67Greek characters of 2-byte character sets")
68(define-category ?H "2-byte Hiragana
69Japanese Hiragana characters of 2-byte character sets")
70(define-category ?K "2-byte Katakana
71Japanese Katakana characters of 2-byte character sets")
72(define-category ?N "2-byte Korean
73Korean Hangul characters of 2-byte character sets")
74(define-category ?Y "2-byte Cyrillic
75Cyrillic characters of 2-byte character sets")
76(define-category ?I "Indian Glyphs")
77
78;; For phonetic classifications.
79
80(define-category ?0 "consonant")
81(define-category ?1 "base vowel
82Base (independent) vowel")
83(define-category ?2 "upper diacritic
84Upper diacritical mark (including upper vowel)")
85(define-category ?3 "lower diacritic
86Lower diacritical mark (including lower vowel)")
87(define-category ?4 "combining tone
88Combining tone mark")
89(define-category ?5 "symbol")
90(define-category ?6 "digit")
91(define-category ?7 "vowel diacritic
92Vowel-modifying diacritical mark")
93(define-category ?8 "vowel-signs")
94(define-category ?9 "semivowel lower")
95
96;; For filling.
97(define-category ?| "line breakable
98While filling, we can break a line at this character.")
99
100;; For indentation calculation.
101(define-category ?\s
102  "space for indent
103This character counts as a space for indentation purposes.")
104
105;; Keep the following for `kinsoku' processing.  See comments in
106;; kinsoku.el.
107(define-category ?> "Not at bol
108A character which can't be placed at beginning of line.")
109(define-category ?< "Not at eol
110A character which can't be placed at end of line.")
111
112;; Base and Combining
113(define-category ?. "Base
114Base characters (Unicode General Category L,N,P,S,Zs)")
115(define-category ?^ "Combining
116Combining diacritic or mark (Unicode General Category M)")
117
118;; bidi types
119(define-category ?R "Strong R2L
120Characters with \"strong\" right-to-left directionality, i.e.
121with R, AL, RLE, or RLO Unicode bidi character type.")
122
123(define-category ?L "Strong L2R
124Characters with \"strong\" left-to-right directionality, i.e.
125with L, LRE, or LRO Unicode bidi character type.")
126
127
128;;; Setting syntax and category.
129
130;; ASCII
131
132;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
133(modify-category-entry '(32 . 127) ?a)
134(modify-category-entry '(32 . 127) ?l)
135
136;; Deal with the CJK charsets first.  Since the syntax of blocks is
137;; defined per charset, and the charsets may contain e.g. Latin
138;; characters, we end up with the wrong syntax definitions if we're
139;; not careful.
140
141;; Chinese characters (Unicode)
142(modify-category-entry '(#x2E80 . #x312F) ?|)
143(modify-category-entry '(#x3190 . #x33FF) ?|)
144(modify-category-entry '(#x3400 . #x4DB5) ?C)
145(modify-category-entry '(#x4E00 . #x9FD5) ?C)
146(modify-category-entry '(#x3400 . #x9FD5) ?c)
147(modify-category-entry '(#x3400 . #x9FD5) ?|)
148(modify-category-entry '(#xF900 . #xFAFF) ?C)
149(modify-category-entry '(#xF900 . #xFAFF) ?c)
150(modify-category-entry '(#xF900 . #xFAFF) ?|)
151(modify-category-entry '(#x1B170 . #x1B2FF) ?c)
152(modify-category-entry '(#x20000 . #x2FFFF) ?|)
153(modify-category-entry '(#x20000 . #x2FFFF) ?C)
154(modify-category-entry '(#x20000 . #x2FFFF) ?c)
155
156
157;; Chinese character set (GB2312)
158
159(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
160(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
161(map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
162
163(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
164(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
165(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
166(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
167(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
168(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
169(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
170(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
171(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
172
173;; Chinese character set (BIG5)
174
175(map-charset-chars #'modify-category-entry 'big5 ?c)
176(map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA261)
177(map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
178(map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DC)
179
180;; Chinese character set (CNS11643)
181
182(dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
183	     chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
184	     chinese-cns11643-7))
185  (map-charset-chars #'modify-category-entry c ?c)
186  (if (eq c 'chinese-cns11643-1)
187      (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
188    (map-charset-chars #'modify-category-entry c ?C)))
189
190;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213)
191
192(map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
193
194(map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
195
196(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212
197			       japanese-jisx0213-1 japanese-jisx0213-2
198                               japanese-jisx0213.2004-1
199			       cp932-2-byte))
200  (map-charset-chars #'modify-category-entry l ?j))
201
202;; Fullwidth characters
203(modify-category-entry '(#xff01 . #xff60) ?\|)
204
205;; Unicode equivalents of JISX0201-kana
206(let ((range '(#xff61 . #xff9f)))
207  (modify-category-entry range  ?k)
208  (modify-category-entry range ?j)
209  (modify-category-entry range ?\|))
210
211;; Katakana block
212(modify-category-entry '(#x3099 . #x309C) ?K)
213(modify-category-entry '(#x30A0 . #x30FF) ?K)
214(modify-category-entry '(#x31F0 . #x31FF) ?K)
215(modify-category-entry '(#x30A0 . #x30FA) ?\|)
216(modify-category-entry #x30FF ?\|)
217(modify-category-entry '(#x1AFF0 . #x1B000) ?K)
218(modify-category-entry '(#x1B120 . #x1B122) ?K)
219(modify-category-entry '(#x1B164 . #x1B167) ?K)
220
221;; Hiragana block
222(modify-category-entry '(#x3040 . #x309F) ?H)
223(modify-category-entry '(#x3040 . #x3096) ?\|)
224(modify-category-entry #x309F ?\|)
225(modify-category-entry #x30A0 ?H)
226(modify-category-entry #x30FC ?H)
227(modify-category-entry #x1B001 ?H)
228(modify-category-entry #x1B11F ?H)
229(modify-category-entry '(#x1B150 . #x1B152) ?H)
230(modify-category-entry '(#x1B002 . #x1B11E) ?H) ; Hentiagana
231
232(modify-category-entry '(#x1AFF0 . #x1B1FF) ?j)
233
234
235;; JISX0208
236;; Note: Some of these have their syntax updated later below.
237(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
238(map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
239(let ((chars '(????????????〇)))
240  (dolist (elt chars)
241    (modify-syntax-entry elt "w")))
242
243(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
244(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
245(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
246(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
247(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
248(map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
249(let ((chars '(????〇)))
250  (while chars
251    (modify-category-entry (car chars) ?C)
252    (setq chars (cdr chars))))
253
254;; JISX0212
255
256(map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
257
258;; JISX0201-Kana
259
260(let ((chars '(???・)))
261  (while chars
262    (modify-syntax-entry (car chars) ".")
263    (setq chars (cdr chars))))
264
265(modify-syntax-entry ?\「 "(」")
266(modify-syntax-entry ?\」 "(「")
267
268;; Korean character set (KSC5601)
269
270(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
271
272(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
273(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
274(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
275(map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x2975)
276(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
277(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
278(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
279(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
280(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
281(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
282(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
283
284;; These are in more than one charset.
285(let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛"
286		      "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"
287		      "()[]{}"))
288      open close)
289  (dotimes (i (/ (length parens) 2))
290    (setq open (aref parens (* i 2))
291	  close (aref parens (1+ (* i 2))))
292    (modify-syntax-entry open (format "(%c" close))
293    (modify-syntax-entry close (format ")%c" open))))
294
295;; Arabic character set
296
297(let ((charsets '(arabic-iso8859-6
298		  arabic-digit
299		  arabic-1-column
300		  arabic-2-column)))
301  (while charsets
302    (map-charset-chars #'modify-category-entry (car charsets) ?b)
303    (setq charsets (cdr charsets))))
304(modify-category-entry '(#x600 . #x6ff) ?b)
305(modify-category-entry '(#x870 . #x8ff) ?b)
306(modify-category-entry '(#xfb50 . #xfdff) ?b)
307(modify-category-entry '(#xfe70 . #xfefe) ?b)
308
309;; Cyrillic character set (ISO-8859-5)
310
311(modify-syntax-entry ?".")
312
313;; Ethiopic character set
314
315(modify-category-entry '(#x1200 . #x1399) ?e)
316(modify-category-entry '(#X2D80 . #X2DDE) ?e)
317(modify-category-entry '(#xAB01 . #xAB2E) ?e)
318(modify-category-entry '(#x1E7E0 . #x1E7FE) ?e)
319(let ((chars '(????????፨)))
320  (while chars
321    (modify-syntax-entry (car chars) ".")
322    (setq chars (cdr chars))))
323(map-charset-chars #'modify-category-entry 'ethiopic ?e)
324
325;; Hebrew character set (ISO-8859-8)
326
327(modify-syntax-entry #x5be ".") ; MAQAF
328(modify-syntax-entry #x5c0 ".") ; PASEQ
329(modify-syntax-entry #x5c3 ".") ; SOF PASUQ
330(modify-syntax-entry #x5c6 ".") ; NUN HAFUKHA
331(modify-syntax-entry #x5f3 ".") ; GERESH
332(modify-syntax-entry #x5f4 ".") ; GERSHAYIM
333
334;; Indian character set (IS 13194 and other Emacs original Indian charsets)
335
336(modify-category-entry '(#x901 . #x970) ?i)
337(map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
338(map-charset-chars #'modify-category-entry 'indian-2-column ?i)
339
340;; Lao character set
341
342(modify-category-entry '(#xe80 . #xeff) ?o)
343(map-charset-chars #'modify-category-entry 'lao ?o)
344
345(let ((deflist	'(("ກ-ຮ"	"w"	?0) ; consonant
346		  ("ະາຳຽເ-ໄ"	"w"	?1) ; vowel base
347		  ("ັິ-ືົໍ"	"w"	?2) ; vowel upper
348		  ("ຸູ"	"w"	?3) ; vowel lower
349		  ("່-໋"	"w"	?4) ; tone mark
350		  ("ຼຽ"	"w"	?9) ; semivowel lower
351		  ("໐-໙"	"w"	?6) ; digit
352		  ("ຯໆ"	"_"	?5) ; symbol
353		  ))
354      elm chars len syntax category to ch i)
355  (while deflist
356    (setq elm (car deflist))
357    (setq chars (car elm)
358	  len (length chars)
359	  syntax (nth 1 elm)
360	  category (nth 2 elm)
361	  i 0)
362    (while (< i len)
363      (if (= (aref chars i) ?-)
364	  (setq i (1+ i)
365		to (aref chars i))
366	(setq ch (aref chars i)
367	      to ch))
368      (while (<= ch to)
369	(unless (string-equal syntax "w")
370	  (modify-syntax-entry ch syntax))
371	(modify-category-entry ch category)
372	(setq ch (1+ ch)))
373      (setq i (1+ i)))
374    (setq deflist (cdr deflist))))
375
376;; Thai character set (TIS620)
377
378(modify-category-entry '(#xe00 . #xe7f) ?t)
379(map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
380
381(let ((deflist	'(;; chars	syntax	category
382		  ("ก-รลว-ฮ"	"w"	?0) ; consonant
383		  ("ฤฦะาำเ-ๅ"	"w"	?1) ; vowel base
384		  ("ัิ-ื็๎"	"w"	?2) ; vowel upper
385		  ("ุ-ฺ"	"w"	?3) ; vowel lower
386		  ("่-ํ"	"w"	?4) ; tone mark
387		  ("๐-๙"	"w"	?6) ; digit
388		  ("ฯๆ฿๏๚๛"	"_"	?5) ; symbol
389		  ))
390      elm chars len syntax category to ch i)
391  (while deflist
392    (setq elm (car deflist))
393    (setq chars (car elm)
394	  len (length chars)
395	  syntax (nth 1 elm)
396	  category (nth 2 elm)
397	  i 0)
398    (while (< i len)
399      (if (= (aref chars i) ?-)
400	  (setq i (1+ i)
401		to (aref chars i))
402	(setq ch (aref chars i)
403	      to ch))
404      (while (<= ch to)
405	(unless (string-equal syntax "w")
406	  (modify-syntax-entry ch syntax))
407	(modify-category-entry ch category)
408	(setq ch (1+ ch)))
409      (setq i (1+ i)))
410    (setq deflist (cdr deflist))))
411
412;; Tibetan character set
413
414(modify-category-entry '(#xf00 . #xfff) ?q)
415(map-charset-chars #'modify-category-entry 'tibetan ?q)
416(map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
417
418(let ((deflist	'(;; chars             syntax category
419		  ("ཀ-ཀྵཪ"        	"w"	?0) ; consonant
420		  ("ྐ-ྐྵྺྻྼ"       "w"     ?0) ;
421		  ("ིེཻོཽྀ"       "w"	?2) ; upper vowel
422		  ("ཾྂྃ྆྇ྈྉྊྋ" "w"	?2) ; upper modifier
423		  ("྄ཱུ༙༵༷"       "w"	?3) ; lower vowel/modifier
424		  ("཰"		"w" ?3)		    ; invisible vowel a
425		  ("༠-༩༪-༳"	        "w"	?6) ; digit
426		  ("་།-༒༔ཿ"        "."     ?|) ; line-break char
427		  ("་།༏༐༑༔ཿ"            "."     ?|) ;
428		  ("༈་།-༒༔ཿ༽༴"  "."     ?>) ; prohibition
429		  ("་།༏༐༑༔ཿ"            "."     ?>) ;
430		  ("ༀ-༊༼࿁࿂྅"      "."     ?<) ; prohibition
431		  ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
432		  ))
433      elm chars len syntax category to ch i)
434  (while deflist
435    (setq elm (car deflist))
436    (setq chars (car elm)
437	  len (length chars)
438	  syntax (nth 1 elm)
439	  category (nth 2 elm)
440	  i 0)
441    (while (< i len)
442      (if (= (aref chars i) ?-)
443	  (setq i (1+ i)
444		to (aref chars i))
445	(setq ch (aref chars i)
446	      to ch))
447      (while (<= ch to)
448	(unless (string-equal syntax "w")
449	  (modify-syntax-entry ch syntax))
450	(modify-category-entry ch category)
451	(setq ch (1+ ch)))
452      (setq i (1+ i)))
453    (setq deflist (cdr deflist))))
454
455;; Vietnamese character set
456
457;; To make a word with Latin characters
458(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
459(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
460
461(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
462(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
463
464(let ((tbl (standard-case-table))
465      (i 32))
466  (while (< i 128)
467    (let* ((char (decode-char 'vietnamese-viscii-upper i))
468	   (charl (decode-char 'vietnamese-viscii-lower i))
469	   (uc (encode-char char 'ucs))
470	   (lc (encode-char charl 'ucs)))
471      (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
472			    tbl)
473      (if uc (modify-category-entry uc ?v))
474      (if lc (modify-category-entry lc ?v)))
475    (setq i (1+ i))))
476
477;; Tai Viet
478(let ((deflist '(;; chars	syntax	category
479		 ((?.  ?ꪯ)	"w"	?0) ; consonant
480		 ("ꪱꪵꪶ"		"w"	?1) ; vowel base
481		 ((?. ?ꪽ)	"w"	?1) ; vowel base
482		 ("ꪰꪲꪳꪷꪸꪾ"	"w"	?2) ; vowel upper
483		 ("ꪴ"		"w"	?3) ; vowel lower
484		 ("ꫀꫂ"		"w"	?1) ; non-combining tone-mark
485		 ("꪿꫁"		"w"	?4) ; combining tone-mark
486		 ((?. ?꫟)	"_"	?5) ; symbol
487		 )))
488  (dolist (elm deflist)
489    (let ((chars (car elm))
490	  (syntax (nth 1 elm))
491	  (category (nth 2 elm)))
492      (if (consp chars)
493	  (progn
494	    (modify-syntax-entry chars syntax)
495	    (modify-category-entry chars category))
496        (mapc (lambda (x)
497                (modify-syntax-entry x syntax)
498                (modify-category-entry x category))
499	      chars)))))
500
501;; Bidi categories
502
503;; If bootstrapping without generated uni-*.el files, table not defined.
504(let ((table (unicode-property-table-internal 'bidi-class)))
505  (when table
506    (map-char-table (lambda (key val)
507		      (cond
508		       ((memq val '(R AL RLO RLE))
509			(modify-category-entry key ?R))
510		       ((memq val '(L LRE LRO))
511			(modify-category-entry key ?L))))
512		    table)))
513
514;; Load uni-mirrored.el and uni-brackets.el if available, so that they
515;; get dumped into Emacs.  This allows starting Emacs with
516;; force-load-messages in ~/.emacs, and avoid infinite recursion in
517;; bidi_initialize, which needs to load uni-mirrored.el and
518;; uni-brackets.el in order to display the "Loading" messages.
519(unicode-property-table-internal 'mirroring)
520(unicode-property-table-internal 'bracket-type)
521
522;; Latin
523
524(modify-category-entry '(#x80 . #x024F) ?l)
525
526(let ((tbl (standard-case-table)) c)
527
528  ;; Latin-1
529
530  ;; Fixme: Some of the non-word syntaxes here perhaps should be
531  ;; reviewed.  (Note that the following all implicitly have word
532  ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.)  There should be a well-defined way of
533  ;; relating Unicode categories to Emacs syntax codes.
534
535  ;; FIXME: We should probably just use the Unicode properties to set
536  ;; up the syntax table.
537
538  (set-case-syntax ?¡ "." tbl)
539  (set-case-syntax ?¦ "_" tbl)
540  (set-case-syntax ?§ "." tbl)
541  (set-case-syntax ?© "_" tbl)
542  ;; French wants
543  ;;   (set-case-syntax-delims ?« ?» tbl)
544  ;; And German wants
545  ;;   (set-case-syntax-delims ?» ?« tbl)
546  ;; So let's stay neutral and let users set these up if/when they want to.
547  (set-case-syntax ?« "." tbl)
548  (set-case-syntax ?» "." tbl)
549  (set-case-syntax ?¬ "_" tbl)
550  (set-case-syntax ?­ "_" tbl)
551  (set-case-syntax ?® "_" tbl)
552  (set-case-syntax ?° "_" tbl)
553  (set-case-syntax ?± "_" tbl)
554  (set-case-syntax ?µ "_" tbl)
555  (set-case-syntax ?· "_" tbl)
556  (set-case-syntax ?¼ "_" tbl)
557  (set-case-syntax ?½ "_" tbl)
558  (set-case-syntax ?¾ "_" tbl)
559  (set-case-syntax ?¿ "." tbl)
560  (set-case-syntax ?× "_" tbl)
561  (set-case-syntax ?ß "w" tbl)
562  (set-case-syntax ?÷ "_" tbl)
563  ;; See below for ÿ.
564
565  ;; Latin Extended-A, Latin Extended-B
566  (setq c #x0100)
567  (while (<= c #x02B8)
568    (modify-category-entry c ?l)
569    (setq c (1+ c)))
570
571  ;; Latin Extended Additional
572  (modify-category-entry '(#x1E00 . #x1EF9) ?l)
573
574  ;; Latin Extended-C
575  (setq c #x2C60)
576  (while (<= c #x2C7F)
577    (modify-category-entry c ?l)
578    (setq c (1+ c)))
579
580  ;; Latin Extended-D
581  (setq c #xA720)
582  (while (<= c #xA7FF)
583    (modify-category-entry c ?l)
584    (setq c (1+ c)))
585
586  ;; Latin Extended-E
587  (setq c #xAB30)
588  (while (<= c #xAB64)
589    (modify-category-entry c ?l)
590    (setq c (1+ c)))
591
592  ;; Latin Extended-G
593  (setq c #x1DF00)
594  (while (<= c #x1DFFF)
595    (modify-category-entry c ?l)
596    (setq c (1+ c)))
597
598  ;; Greek
599  (modify-category-entry '(#x0370 . #x03FF) ?g)
600
601  ;; Armenian
602  (setq c #x531)
603
604  ;; Greek Extended
605  (modify-category-entry '(#x1F00 . #x1FFF) ?g)
606
607  ;; cyrillic
608  (modify-category-entry '(#x0400 . #x04FF) ?y)
609  (modify-category-entry '(#xA640 . #xA69F) ?y)
610
611  ;; Georgian
612  (setq c #x10A0)
613
614  ;; Cyrillic Extended-C
615  (modify-category-entry '(#x1C80 . #x1C8F) ?y)
616
617  ;; space characters (see section 6.2 in the Unicode Standard)
618  (set-case-syntax ?  " " tbl)
619  (setq c #x2000)
620  (while (<= c #x200b)
621    (set-case-syntax c " " tbl)
622    (setq c (1+ c)))
623  (let ((chars '(#x202F #x205F #x3000)))
624    (while chars
625      (set-case-syntax (car chars) " " tbl)
626      (setq chars (cdr chars))))
627  ;; general punctuation
628  (while (<= c #x200F)
629    (set-case-syntax c "." tbl)
630    (setq c (1+ c)))
631  (setq c #x2010)
632  ;; Fixme: What to do with characters that have Pi and Pf
633  ;; Unicode properties?
634  (while (<= c #x2017)
635    (set-case-syntax c "." tbl)
636    (setq c (1+ c)))
637  ;; Punctuation syntax for quotation marks (like `)
638  (while (<= c #x201F)
639    (set-case-syntax  c "." tbl)
640    (setq c (1+ c)))
641  (while (<= c #x2027)
642    (set-case-syntax c "." tbl)
643    (setq c (1+ c)))
644  (setq c #x2030)
645  (while (<= c #x205E)
646    (set-case-syntax c "." tbl)
647    (setq c (1+ c)))
648  (let ((chars '(????⁒)))
649    (while chars
650      (modify-syntax-entry (car chars) "_")
651      (setq chars (cdr chars))))
652
653  ;; Arrows
654  (setq c #x2190)
655  (while (<= c #x21FF)
656    (set-case-syntax c "_" tbl)
657    (setq c (1+ c)))
658  ;; Mathematical Operators
659  (while (<= c #x22FF)
660    (set-case-syntax c "_" tbl)
661    (setq c (1+ c)))
662  ;; Miscellaneous Technical
663  (while (<= c #x23FF)
664    (set-case-syntax c "_" tbl)
665    (setq c (1+ c)))
666  ;; Control Pictures
667  (while (<= c #x244F)
668    (set-case-syntax c "_" tbl)
669    (setq c (1+ c)))
670
671  ;; Circled Latin
672  (setq c #x24B6)
673  (while (<= c #x24CF)
674    (modify-category-entry c ?l)
675    (modify-category-entry (+ c 26) ?l)
676    (setq c (1+ c)))
677
678  ;; Supplemental Mathematical Operators
679  (setq c #x2A00)
680  (while (<= c #x2AFF)
681    (set-case-syntax c "_" tbl)
682    (setq c (1+ c)))
683
684  ;; Miscellaneous Symbols and Arrows
685  (setq c #x2B00)
686  (while (<= c #x2BFF)
687    (set-case-syntax c "_" tbl)
688    (setq c (1+ c)))
689
690  ;; Coptic
691  ;; There's no Coptic category.  However, Coptic letters that are
692  ;; part of the Greek block above get the Greek category, and those
693  ;; in this block are derived from Greek letters, so let's be
694  ;; consistent about their category.
695  (modify-category-entry '(#x2C80 . #x2CFF) ?g)
696
697  ;; Supplemental Punctuation
698  (setq c #x2E00)
699  (while (<= c #x2E7F)
700    (set-case-syntax c "." tbl)
701    (setq c (1+ c)))
702
703  ;; Ideographic punctuation
704  (setq c #x3001)
705  (while (<= c #x3003)
706    (set-case-syntax c "." tbl)
707    (setq c (1+ c)))
708  (set-case-syntax #x30FB "." tbl)
709
710  ;; Symbols for Legacy Computing
711  (setq c #x1FB00)
712  (while (<= c #x1FBCA)
713    (set-case-syntax c "_" tbl)
714    (setq c (1+ c)))
715  ;; FIXME: Should these be digits?
716  (while (<= c #x1FBFF)
717    (set-case-syntax c "." tbl)
718    (setq c (1+ c)))
719
720  ;; Fullwidth Latin
721  (setq c #xFF01)
722  (while (<= c #xFF0F)
723    (set-case-syntax c "." tbl)
724    (setq c (1+ c)))
725  (set-case-syntax #xFF04 "_" tbl)
726  (set-case-syntax #xFF0B "_" tbl)
727  (set-case-syntax #xFF1A "." tbl)
728  (set-case-syntax #xFF1B "." tbl)
729  (set-case-syntax #xFF1F "." tbl)
730  (set-case-syntax #xFF20 "." tbl)
731  (setq c #xFF21)
732  (while (<= c #xFF3A)
733    (modify-category-entry c ?l)
734    (modify-category-entry (+ c #x20) ?l)
735    (setq c (1+ c)))
736
737  ;; Halfwidth Latin
738  (setq c #xFF64)
739  (while (<= c #xFF65)
740    (set-case-syntax c "." tbl)
741    (setq c (1+ c)))
742  (set-case-syntax #xFF61 "." tbl)
743
744  ;; Combining diacritics
745  (modify-category-entry '(#x300 . #x362) ?^)
746  ;; Combining marks
747  (modify-category-entry '(#x20d0 . #x20ff) ?^)
748
749  (let ((gc (unicode-property-table-internal 'general-category))
750        (syn-table (standard-syntax-table)))
751    ;; In early bootstrapping Unicode tables are not available so we need to
752    ;; skip this step in those cases.
753    (when gc
754      ;; Set all Letter, uppercase; Letter, lowercase and Letter,
755      ;; titlecase syntax to word.
756      (map-char-table
757       (lambda (ch cat)
758         (when (memq cat '(Lu Ll Lt))
759           (modify-syntax-entry ch "w   " syn-table)))
760       gc)
761      ;; Ⅰ through Ⅻ had word syntax in the past so set it here as well.
762      ;; The general category of those characters is Number, Letter.
763      (modify-syntax-entry '(#x2160 . #x216b) "w   " syn-table)
764
765      ;; ⓐ through ⓩ are symbols, other according to Unicode but Emacs set
766      ;; their syntax to word in the past so keep backwards compatibility.
767      (modify-syntax-entry '(#x24D0 . #x24E9) "w   " syn-table)
768
769      ;; Set downcase and upcase from Unicode properties
770
771      ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I and
772      ;; U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so do U+0130
773      ;; LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
774
775      ;; We used to set up half of those correspondence unconditionally, but
776      ;; that makes searches slow.  So now we don't set up either half of these
777      ;; correspondences by default.
778
779      ;; (set-downcase-syntax  ?İ ?i tbl)
780      ;; (set-upcase-syntax    ?I ?ı tbl)
781
782      (let ((map-unicode-property
783             (lambda (property func)
784               (map-char-table
785                (lambda (ch cased)
786                  ;; ASCII characters skipped due to reasons outlined above.  As
787                  ;; of Unicode 9.0, this exception affects the following:
788                  ;;   lc(U+0130 İ) = i
789                  ;;   uc(U+0131 ı) = I
790                  ;;   uc(U+017F ſ) = S
791                  ;;   uc(U+212A K) = k
792                  (when (> cased 127)
793                    (let ((end (if (consp ch) (cdr ch) ch)))
794                      (setq ch (max 128 (if (consp ch) (car ch) ch)))
795                      (while (<= ch end)
796                        (funcall func ch cased)
797                        (setq ch (1+ ch))))))
798                (unicode-property-table-internal property))))
799            (down tbl)
800            (up (case-table-get-table tbl 'up)))
801
802        ;; This works on an assumption that if toUpper(x) != x then toLower(x)
803        ;; == x (and the opposite for toLower/toUpper).  This doesn’t hold for
804        ;; title case characters but those incorrect mappings will be
805        ;; overwritten later.
806        (funcall map-unicode-property 'uppercase
807                 (lambda (lc uc) (aset down lc lc) (aset up uc uc)))
808        (funcall map-unicode-property 'lowercase
809                 (lambda (uc lc) (aset down lc lc) (aset up uc uc)))
810
811        ;; Now deal with the actual mapping.  This will correctly assign casing
812        ;; for title-case characters.
813        (funcall map-unicode-property 'uppercase
814                 (lambda (lc uc) (aset up lc uc) (aset up uc uc)))
815        (funcall map-unicode-property 'lowercase
816                 (lambda (uc lc) (aset down uc lc) (aset down lc lc)))
817
818        ;; Override the Unicode uppercase property for ß, since we are
819        ;; using our case tables for determining the case of a
820        ;; character (see uppercasep and lowercasep in buffer.h).
821        ;; The special-uppercase property of ß ensures that it is
822        ;; still upcased to SS per the usual convention.
823        (aset up ?ß ?ẞ))))
824
825  ;; Clear out the extra slots so that they will be recomputed from the main
826  ;; (downcase) table and upcase table.  Since we’re side-stepping the usual
827  ;; set-case-syntax-* functions, we need to do it explicitly.
828  (set-char-table-extra-slot tbl 1 nil)
829  (set-char-table-extra-slot tbl 2 nil)
830
831  ;; Fixme: syntax for symbols &c
832  )
833
834(let ((pairs
835       '("⁅⁆"				; U+2045 U+2046
836	 "⁽⁾"				; U+207D U+207E
837	 "₍₎"				; U+208D U+208E
838	 "〈〉"				; U+2329 U+232A
839	 "⎴⎵"				; U+23B4 U+23B5
840	 "❨❩"				; U+2768 U+2769
841	 "❪❫"				; U+276A U+276B
842	 "❬❭"				; U+276C U+276D
843	 "❰❱"				; U+2770 U+2771
844	 "❲❳"				; U+2772 U+2773
845	 "❴❵"				; U+2774 U+2775
846	 "⟦⟧"				; U+27E6 U+27E7
847	 "⟨⟩"				; U+27E8 U+27E9
848	 "⟪⟫"				; U+27EA U+27EB
849	 "⦃⦄"				; U+2983 U+2984
850	 "⦅⦆"				; U+2985 U+2986
851	 "⦇⦈"				; U+2987 U+2988
852	 "⦉⦊"				; U+2989 U+298A
853	 "⦋⦌"				; U+298B U+298C
854	 "⦍⦎"				; U+298D U+298E
855	 "⦏⦐"				; U+298F U+2990
856	 "⦑⦒"				; U+2991 U+2992
857	 "⦓⦔"				; U+2993 U+2994
858	 "⦕⦖"				; U+2995 U+2996
859	 "⦗⦘"				; U+2997 U+2998
860	 "⧼⧽"				; U+29FC U+29FD
861	 "〈〉"				; U+3008 U+3009
862	 "《》"				; U+300A U+300B
863	 "「」"				; U+300C U+300D
864	 "『』"				; U+300E U+300F
865	 "【】"				; U+3010 U+3011
866	 "〔〕"				; U+3014 U+3015
867	 "〖〗"				; U+3016 U+3017
868	 "〘〙"				; U+3018 U+3019
869	 "〚〛"				; U+301A U+301B
870	 "﴾﴿"				; U+FD3E U+FD3F
871	 "︵︶"				; U+FE35 U+FE36
872	 "︷︸"				; U+FE37 U+FE38
873	 "︹︺"				; U+FE39 U+FE3A
874	 "︻︼"				; U+FE3B U+FE3C
875	 "︽︾"				; U+FE3D U+FE3E
876	 "︿﹀"				; U+FE3F U+FE40
877	 "﹁﹂"				; U+FE41 U+FE42
878	 "﹃﹄"				; U+FE43 U+FE44
879	 "﹙﹚"				; U+FE59 U+FE5A
880	 "﹛﹜"				; U+FE5B U+FE5C
881	 "﹝﹞"				; U+FE5D U+FE5E
882	 "()"				; U+FF08 U+FF09
883	 "[]"				; U+FF3B U+FF3D
884	 "{}"				; U+FF5B U+FF5D
885	 "⦅⦆"				; U+FF5F U+FF60
886	 "「」"				; U+FF62 U+FF63
887	 )))
888  (dolist (elt pairs)
889    (modify-syntax-entry (aref elt 0) (string ?\( (aref elt 1)))
890    (modify-syntax-entry (aref elt 1) (string ?\) (aref elt 0)))))
891
892
893;; For each character set, put the information of the most proper
894;; coding system to encode it by `preferred-coding-system' property.
895
896;; Fixme: should this be junked?
897(let ((l '((latin-iso8859-1	. iso-latin-1)
898	   (latin-iso8859-2	. iso-latin-2)
899	   (latin-iso8859-3	. iso-latin-3)
900	   (latin-iso8859-4	. iso-latin-4)
901	   (thai-tis620		. thai-tis620)
902	   (greek-iso8859-7	. greek-iso-8bit)
903	   (arabic-iso8859-6	. iso-2022-7bit)
904	   (hebrew-iso8859-8	. hebrew-iso-8bit)
905	   (katakana-jisx0201	. japanese-shift-jis)
906	   (latin-jisx0201	. japanese-shift-jis)
907	   (cyrillic-iso8859-5	. cyrillic-iso-8bit)
908	   (latin-iso8859-9	. iso-latin-5)
909	   (japanese-jisx0208-1978 . iso-2022-jp)
910	   (chinese-gb2312	. chinese-iso-8bit)
911	   (chinese-gbk		. chinese-gbk)
912	   (gb18030-2-byte	. chinese-gb18030)
913	   (gb18030-4-byte-bmp	. chinese-gb18030)
914	   (gb18030-4-byte-smp	. chinese-gb18030)
915	   (gb18030-4-byte-ext-1 . chinese-gb18030)
916	   (gb18030-4-byte-ext-2 . chinese-gb18030)
917	   (japanese-jisx0208	. iso-2022-jp)
918	   (korean-ksc5601	. iso-2022-kr)
919	   (japanese-jisx0212	. iso-2022-jp)
920	   (chinese-big5-1	. chinese-big5)
921	   (chinese-big5-2	. chinese-big5)
922	   (chinese-sisheng	. iso-2022-7bit)
923	   (ipa			. iso-2022-7bit)
924	   (vietnamese-viscii-lower . vietnamese-viscii)
925	   (vietnamese-viscii-upper . vietnamese-viscii)
926	   (arabic-digit	. iso-2022-7bit)
927	   (arabic-1-column	. iso-2022-7bit)
928	   (lao			. lao)
929	   (arabic-2-column	. iso-2022-7bit)
930	   (indian-is13194	. devanagari)
931	   (indian-glyph	. devanagari)
932	   (tibetan-1-column	. tibetan)
933	   (ethiopic		. iso-2022-7bit)
934	   (chinese-cns11643-1	. iso-2022-cn)
935	   (chinese-cns11643-2	. iso-2022-cn)
936	   (chinese-cns11643-3	. iso-2022-cn)
937	   (chinese-cns11643-4	. iso-2022-cn)
938	   (chinese-cns11643-5	. iso-2022-cn)
939	   (chinese-cns11643-6	. iso-2022-cn)
940	   (chinese-cns11643-7	. iso-2022-cn)
941	   (indian-2-column	. devanagari)
942	   (tibetan		. tibetan)
943	   (latin-iso8859-14	. iso-latin-8)
944	   (latin-iso8859-15	. iso-latin-9))))
945  (while l
946    (put-charset-property (car (car l)) 'preferred-coding-system (cdr (car l)))
947    (setq l (cdr l))))
948
949
950;; Setup auto-fill-chars for charsets that should invoke auto-filling.
951;; SPACE and NEWLINE are already set.
952
953(set-char-table-range auto-fill-chars '(#x3041 . #x30FF) t)
954(set-char-table-range auto-fill-chars '(#x3400 . #x4DB5) t)
955(set-char-table-range auto-fill-chars '(#x4e00 . #x9fbb) t)
956(set-char-table-range auto-fill-chars '(#xF900 . #xFAFF) t)
957(set-char-table-range auto-fill-chars '(#xFF00 . #xFF9F) t)
958(set-char-table-range auto-fill-chars '(#x20000 . #x2FFFF) t)
959
960
961;;; Setting char-width-table.  The default is 1.
962
963;; 0: non-spacing, enclosing combining, formatting, Hangul Jamo medial
964;;    and final characters.
965(let ((l '((#x0300 . #x036F)
966	   (#x0483 . #x0489)
967	   (#x0591 . #x05BD)
968	   (#x05BF . #x05BF)
969	   (#x05C1 . #x05C2)
970	   (#x05C4 . #x05C5)
971	   (#x05C7 . #x05C7)
972	   (#x0600 . #x0605)
973	   (#x0610 . #x061C)
974	   (#x064B . #x065F)
975	   (#x0670 . #x0670)
976	   (#x06D6 . #x06E4)
977	   (#x06E7 . #x06E8)
978	   (#x06EA . #x06ED)
979	   (#x070F . #x070F)
980	   (#x0711 . #x0711)
981	   (#x0730 . #x074A)
982	   (#x07A6 . #x07B0)
983	   (#x07EB . #x07F3)
984	   (#x0816 . #x0823)
985	   (#x0825 . #x082D)
986	   (#x0859 . #x085B)
987	   (#x08D4 . #x0902)
988	   (#x093A . #x093A)
989	   (#x093C . #x093C)
990	   (#x0941 . #x0948)
991	   (#x094D . #x094D)
992	   (#x0951 . #x0957)
993	   (#x0962 . #x0963)
994	   (#x0981 . #x0981)
995	   (#x09BC . #x09BC)
996	   (#x09C1 . #x09C4)
997	   (#x09CD . #x09CD)
998	   (#x09E2 . #x09E3)
999	   (#x0A01 . #x0A02)
1000	   (#x0A3C . #x0A3C)
1001	   (#x0A41 . #x0A4D)
1002	   (#x0A41 . #x0A42)
1003	   (#x0A47 . #x0A48)
1004	   (#x0A4B . #x0A4D)
1005	   (#x0A51 . #x0A51)
1006	   (#x0A70 . #x0A71)
1007	   (#x0A75 . #x0A75)
1008	   (#x0A81 . #x0A82)
1009	   (#x0ABC . #x0ABC)
1010	   (#x0AC1 . #x0AC8)
1011	   (#x0ACD . #x0ACD)
1012	   (#x0AE2 . #x0AE3)
1013	   (#x0B01 . #x0B01)
1014	   (#x0B3C . #x0B3C)
1015	   (#x0B3F . #x0B3F)
1016	   (#x0B41 . #x0B44)
1017	   (#x0B4D . #x0B56)
1018	   (#x0B62 . #x0B63)
1019	   (#x0B82 . #x0B82)
1020	   (#x0BC0 . #x0BC0)
1021	   (#x0BCD . #x0BCD)
1022	   (#x0C00 . #x0C00)
1023	   (#x0C3E . #x0C40)
1024	   (#x0C46 . #x0C56)
1025	   (#x0C62 . #x0C63)
1026	   (#x0C81 . #x0C81)
1027	   (#x0CBC . #x0CBC)
1028	   (#x0CCC . #x0CCD)
1029	   (#x0CE2 . #x0CE3)
1030	   (#x0D01 . #x0D01)
1031	   (#x0D41 . #x0D44)
1032	   (#x0D4D . #x0D4D)
1033	   (#x0D62 . #x0D63)
1034	   (#x0D81 . #x0D81)
1035	   (#x0DCA . #x0DCA)
1036	   (#x0DD2 . #x0DD6)
1037	   (#x0E31 . #x0E31)
1038	   (#x0E34 . #x0E3A)
1039	   (#x0E47 . #x0E4E)
1040	   (#x0EB1 . #x0EB1)
1041	   (#x0EB4 . #x0EBC)
1042	   (#x0EC8 . #x0ECD)
1043	   (#x0F18 . #x0F19)
1044	   (#x0F35 . #x0F35)
1045	   (#x0F37 . #x0F37)
1046	   (#x0F39 . #x0F39)
1047	   (#x0F71 . #x0F7E)
1048	   (#x0F80 . #x0F84)
1049	   (#x0F86 . #x0F87)
1050	   (#x0F8D . #x0FBC)
1051	   (#x0FC6 . #x0FC6)
1052	   (#x102D . #x1030)
1053	   (#x1032 . #x1037)
1054	   (#x1039 . #x103A)
1055	   (#x103D . #x103E)
1056	   (#x1058 . #x1059)
1057	   (#x105E . #x1060)
1058	   (#x1071 . #x1074)
1059	   (#x1082 . #x1082)
1060	   (#x1085 . #x1086)
1061	   (#x108D . #x108D)
1062	   (#x109D . #x109D)
1063	   (#x1160 . #x11FF)
1064	   (#x135D . #x135F)
1065	   (#x1712 . #x1714)
1066	   (#x1732 . #x1734)
1067	   (#x1752 . #x1753)
1068	   (#x1772 . #x1773)
1069	   (#x17B4 . #x17B5)
1070	   (#x17B7 . #x17BD)
1071	   (#x17C6 . #x17C6)
1072	   (#x17C9 . #x17D3)
1073	   (#x17DD . #x17DD)
1074	   (#x180B . #x180E)
1075	   (#x18A9 . #x18A9)
1076	   (#x1885 . #x1886)
1077	   (#x18A9 . #x18A9)
1078	   (#x1920 . #x1922)
1079	   (#x1927 . #x1928)
1080	   (#x1932 . #x1932)
1081	   (#x1939 . #x193B)
1082	   (#x1A17 . #x1A18)
1083	   (#x1A1B . #x1A1B)
1084	   (#x1A56 . #x1A56)
1085	   (#x1A58 . #x1A5E)
1086	   (#x1A60 . #x1A60)
1087	   (#x1A62 . #x1A62)
1088	   (#x1A65 . #x1A6C)
1089	   (#x1A73 . #x1A7C)
1090	   (#x1A7F . #x1A7F)
1091	   (#x1AB0 . #x1AC0)
1092	   (#x1B00 . #x1B03)
1093	   (#x1B34 . #x1B34)
1094	   (#x1B36 . #x1B3A)
1095	   (#x1B3C . #x1B3C)
1096	   (#x1B42 . #x1B42)
1097	   (#x1B6B . #x1B73)
1098	   (#x1B80 . #x1B81)
1099	   (#x1BA2 . #x1BA5)
1100	   (#x1BA8 . #x1BA9)
1101	   (#x1BAB . #x1BAD)
1102	   (#x1BE6 . #x1BE6)
1103	   (#x1BE8 . #x1BE9)
1104	   (#x1BED . #x1BED)
1105	   (#x1BEF . #x1BF1)
1106	   (#x1C2C . #x1C33)
1107	   (#x1C36 . #x1C37)
1108	   (#x1CD0 . #x1CD2)
1109	   (#x1CD4 . #x1CE0)
1110	   (#x1CE2 . #x1CE8)
1111	   (#x1CED . #x1CED)
1112	   (#x1CF4 . #x1CF4)
1113	   (#x1CF8 . #x1CF9)
1114	   (#x1DC0 . #x1DFF)
1115	   (#x200B . #x200F)
1116	   (#x202A . #x202E)
1117	   (#x2060 . #x206F)
1118	   (#x20D0 . #x20F0)
1119	   (#x2CEF . #x2CF1)
1120	   (#x2D7F . #x2D7F)
1121	   (#x2DE0 . #x2DFF)
1122	   (#xA66F . #xA672)
1123	   (#xA674 . #xA69F)
1124	   (#xA6F0 . #xA6F1)
1125	   (#xA802 . #xA802)
1126	   (#xA806 . #xA806)
1127	   (#xA80B . #xA80B)
1128	   (#xA825 . #xA826)
1129	   (#xA82C . #xA82C)
1130	   (#xA8C4 . #xA8C5)
1131	   (#xA8E0 . #xA8F1)
1132	   (#xA926 . #xA92D)
1133	   (#xA947 . #xA951)
1134	   (#xA980 . #xA9B3)
1135	   (#xA9B6 . #xA9B9)
1136	   (#xA9BC . #xA9BC)
1137	   (#xA9E5 . #xA9E5)
1138	   (#xAA29 . #xAA2E)
1139	   (#xAA31 . #xAA32)
1140	   (#xAA35 . #xAA36)
1141	   (#xAA43 . #xAA43)
1142	   (#xAA4C . #xAA4C)
1143	   (#xAA7C . #xAA7C)
1144	   (#xAAB0 . #xAAB0)
1145	   (#xAAB2 . #xAAB4)
1146	   (#xAAB7 . #xAAB8)
1147	   (#xAABE . #xAABF)
1148	   (#xAAC1 . #xAAC1)
1149	   (#xAAEC . #xAAED)
1150	   (#xAAF6 . #xAAF6)
1151	   (#xABE5 . #xABE5)
1152	   (#xABE8 . #xABE8)
1153	   (#xABED . #xABED)
1154	   (#xD7B0 . #xD7FB)
1155	   (#xFB1E . #xFB1E)
1156	   (#xFE00 . #xFE0F)
1157	   (#xFE20 . #xFE2F)
1158	   (#xFEFF . #xFEFF)
1159	   (#xFFF9 . #xFFFB)
1160	   (#x101FD . #x101FD)
1161	   (#x102E0 . #x102E0)
1162	   (#x10376 . #x1037A)
1163	   (#x10A01 . #x10A0F)
1164	   (#x10A38 . #x10A3F)
1165	   (#x10AE5 . #x10AE6)
1166	   (#x10EAB . #x10EAC)
1167	   (#x11001 . #x11001)
1168	   (#x11038 . #x11046)
1169	   (#x1107F . #x11081)
1170	   (#x110B3 . #x110B6)
1171	   (#x110B9 . #x110BA)
1172	   (#x110BD . #x110BD)
1173	   (#x11100 . #x11102)
1174	   (#x11127 . #x1112B)
1175	   (#x1112D . #x11134)
1176	   (#x11173 . #x11173)
1177	   (#x11180 . #x11181)
1178	   (#x111B6 . #x111BE)
1179	   (#x111CA . #x111CC)
1180	   (#x111CF . #x111CF)
1181	   (#x1122F . #x11231)
1182	   (#x11234 . #x11234)
1183	   (#x11236 . #x11237)
1184	   (#x1123E . #x1123E)
1185	   (#x112DF . #x112DF)
1186	   (#x112E3 . #x112EA)
1187	   (#x11300 . #x11301)
1188	   (#x1133C . #x1133C)
1189	   (#x11340 . #x11340)
1190	   (#x11366 . #x1136C)
1191	   (#x11370 . #x11374)
1192	   (#x11438 . #x1143F)
1193	   (#x11442 . #x11444)
1194	   (#x11446 . #x11446)
1195	   (#x114B3 . #x114B8)
1196	   (#x114BA . #x114C0)
1197	   (#x114C2 . #x114C3)
1198	   (#x115B2 . #x115B5)
1199	   (#x115BC . #x115BD)
1200	   (#x115BF . #x115C0)
1201	   (#x115DC . #x115DD)
1202	   (#x11633 . #x1163A)
1203	   (#x1163D . #x1163D)
1204	   (#x1163F . #x11640)
1205	   (#x116AB . #x116AB)
1206	   (#x116AD . #x116AD)
1207	   (#x116B0 . #x116B5)
1208	   (#x116B7 . #x116B7)
1209	   (#x1171D . #x1171F)
1210	   (#x11722 . #x11725)
1211	   (#x11727 . #x1172B)
1212	   (#x1193B . #x1193C)
1213	   (#x1193E . #x1193E)
1214	   (#x11943 . #x11943)
1215	   (#x11C30 . #x11C36)
1216	   (#x11C38 . #x11C3D)
1217	   (#x11C92 . #x11CA7)
1218	   (#x11CAA . #x11CB0)
1219	   (#x11CB2 . #x11CB3)
1220	   (#x11CB5 . #x11CB6)
1221	   (#x16AF0 . #x16AF4)
1222	   (#x16B30 . #x16B36)
1223	   (#x16F8F . #x16F92)
1224	   (#x16FE4 . #x16FE4)
1225	   (#x1BC9D . #x1BC9E)
1226	   (#x1BCA0 . #x1BCA3)
1227	   (#x1D167 . #x1D169)
1228	   (#x1D173 . #x1D182)
1229	   (#x1D185 . #x1D18B)
1230	   (#x1D1AA . #x1D1AD)
1231	   (#x1D242 . #x1D244)
1232	   (#x1DA00 . #x1DA36)
1233	   (#x1DA3B . #x1DA6C)
1234	   (#x1DA75 . #x1DA75)
1235	   (#x1DA84 . #x1DA84)
1236	   (#x1DA9B . #x1DA9F)
1237	   (#x1DAA1 . #x1DAAF)
1238	   (#x1E000 . #x1E006)
1239	   (#x1E008 . #x1E018)
1240	   (#x1E01B . #x1E021)
1241	   (#x1E023 . #x1E024)
1242	   (#x1E026 . #x1E02A)
1243	   (#x1E8D0 . #x1E8D6)
1244	   (#x1E944 . #x1E94A)
1245	   (#xE0001 . #xE01EF))))
1246  (dolist (elt l)
1247    (set-char-table-range char-width-table elt 0)))
1248
1249;; 2: East Asian Wide and Full-width characters.
1250(let ((l '((#x1100 . #x115F)
1251	   (#x231A . #x231B)
1252	   (#x2329 . #x232A)
1253	   (#x23E9 . #x23EC)
1254	   (#x23F0 . #x23F0)
1255	   (#x23F3 . #x23F3)
1256	   (#x25FD . #x25FE)
1257	   (#x2614 . #x2615)
1258	   (#x2648 . #x2653)
1259	   (#x267F . #x267F)
1260	   (#x2693 . #x2693)
1261	   (#x26A1 . #x26A1)
1262	   (#x26AA . #x26AB)
1263	   (#x26BD . #x26BE)
1264	   (#x26C4 . #x26C5)
1265	   (#x26CE . #x26CE)
1266	   (#x26D4 . #x26D4)
1267	   (#x26EA . #x26EA)
1268	   (#x26F2 . #x26F3)
1269	   (#x26F5 . #x26F5)
1270	   (#x26FA . #x26FA)
1271	   (#x26FD . #x26FD)
1272	   (#x2705 . #x2705)
1273	   (#x270A . #x270B)
1274	   (#x2728 . #x2728)
1275	   (#x274C . #x274C)
1276	   (#x274E . #x274E)
1277	   (#x2753 . #x2755)
1278	   (#x2757 . #x2757)
1279	   (#x2795 . #x2797)
1280	   (#x27B0 . #x27B0)
1281	   (#x27BF . #x27BF)
1282	   (#x2B1B . #x2B1C)
1283	   (#x2B50 . #x2B50)
1284	   (#x2B55 . #x2B55)
1285	   (#x2E80 . #x303E)
1286	   (#x3040 . #x3247)
1287	   (#x3250 . #x4DBF)
1288	   (#x4E00 . #x9FFF)
1289	   (#xA490 . #xA4C6)
1290	   (#xA960 . #xA97F)
1291	   (#xAC00 . #xD7A3)
1292	   (#xF900 . #xFAFF)
1293	   (#xFE10 . #xFE19)
1294	   (#xFE30 . #xFE6F)
1295	   (#xFF01 . #xFF60)
1296	   (#xFFE0 . #xFFE6)
1297	   (#x16FE0 . #x16FE4)
1298	   (#x16FF0 . #x16FF1)
1299	   (#x17000 . #x187F7)
1300	   (#x18800 . #x18AFF)
1301	   (#x18B00 . #x18CD5)
1302	   (#x1AFF0 . #x1AFFF)
1303	   (#x1B000 . #x1B152)
1304	   (#x1B164 . #x1B167)
1305	   (#x1B170 . #x1B2FB)
1306	   (#x1F004 . #x1F004)
1307	   (#x1F0CF . #x1F0CF)
1308	   (#x1F18E . #x1F18E)
1309	   (#x1F191 . #x1F19A)
1310	   (#x1F1AD . #x1F1AD)
1311	   (#x1F200 . #x1F320)
1312	   (#x1F32D . #x1F335)
1313	   (#x1F337 . #x1F37C)
1314	   (#x1F37E . #x1F393)
1315	   (#x1F3A0 . #x1F3CA)
1316	   (#x1F3CF . #x1F3D3)
1317	   (#x1F3E0 . #x1F3F0)
1318	   (#x1F3F4 . #x1F3F4)
1319	   (#x1F3F8 . #x1F3FA)
1320	   (#x1F3FB . #x1F3FF)
1321	   (#x1F400 . #x1F43E)
1322	   (#x1F440 . #x1F440)
1323	   (#x1F442 . #x1F4FC)
1324	   (#x1F4FF . #x1F53D)
1325	   (#x1F54B . #x1F54E)
1326	   (#x1F550 . #x1F567)
1327	   (#x1F57A . #x1F57A)
1328	   (#x1F595 . #x1F596)
1329	   (#x1F5A4 . #x1F5A4)
1330	   (#x1F5FB . #x1F5FF)
1331	   (#x1F600 . #x1F64F)
1332	   (#x1F680 . #x1F6C5)
1333	   (#x1F6CC . #x1F6CC)
1334	   (#x1F6D0 . #x1F6D2)
1335	   (#x1F6D5 . #x1F6D7)
1336	   (#x1F6DD . #x1F6DF)
1337	   (#x1F6EB . #x1F6EC)
1338	   (#x1F6F4 . #x1F6FC)
1339	   (#x1F7E0 . #x1F7F0)
1340	   (#x1F90C . #x1F93A)
1341	   (#x1F93C . #x1F945)
1342	   (#x1F947 . #x1F9FF)
1343	   (#x1FA00 . #x1FA53)
1344	   (#x1FA60 . #x1FA6D)
1345	   (#x1FA70 . #x1FA74)
1346	   (#x1FA78 . #x1FA7C)
1347	   (#x1FA80 . #x1FA86)
1348	   (#x1FA90 . #x1FAAC)
1349	   (#x1FAB0 . #x1FABA)
1350	   (#x1FAC0 . #x1FAC5)
1351	   (#x1FAD0 . #x1FAD9)
1352	   (#x1FAE0 . #x1FAE7)
1353	   (#x1FAF0 . #x1FAF6)
1354	   (#x1FB00 . #x1FB92)
1355	   (#x20000 . #x2FFFF)
1356	   (#x30000 . #x3FFFF))))
1357  (dolist (elt l)
1358    (set-char-table-range char-width-table elt 2)))
1359
1360;; Other double width
1361;;(map-charset-chars
1362;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
1363;; 'ethiopic)
1364;; (map-charset-chars
1365;;  (lambda (range ignore) (set-char-table-range char-width-table range 2))
1366;; 'tibetan)
1367(map-charset-chars
1368 (lambda (range _ignore) (set-char-table-range char-width-table range 2))
1369 'indian-2-column)
1370(map-charset-chars
1371 (lambda (range _ignore) (set-char-table-range char-width-table range 2))
1372 'arabic-2-column)
1373
1374;; Internal use only.
1375;; Alist of locale symbol vs charsets.  In a language environment
1376;; corresponding to the locale, width of characters in the charsets is
1377;; set to 2.  Each element has the form:
1378;;   (LOCALE TABLE (CHARSET (FROM-CODE . TO-CODE) ...) ...)
1379;; LOCALE: locale symbol
1380;; TABLE: char-table used for char-width-table, initially nil.
1381;; CHARSET: character set
1382;; FROM-CODE, TO-CODE: range of code-points in CHARSET
1383
1384(defvar cjk-char-width-table-list
1385  '((ja_JP nil (japanese-jisx0208 (#x2121 . #x287E))
1386	       (cp932-2-byte (#x8140 . #x879F)))
1387    (zh_CN nil (chinese-gb2312 (#x2121 . #x297E)))
1388    (zh_HK nil (big5-hkscs (#xA140 . #xA3FE) (#xC6A0 . #xC8FE)))
1389    (zh_TW nil (big5 (#xA140 . #xA3FE))
1390	       (chinese-cns11643-1 (#x2121 . #x427E)))
1391    (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E)))))
1392
1393;; Internal use only.
1394;; Setup char-width-table appropriate for a language environment
1395;; corresponding to LOCALE-NAME (symbol).
1396
1397(defun use-cjk-char-width-table (locale-name)
1398  (while (char-table-parent char-width-table)
1399    (setq char-width-table (char-table-parent char-width-table)))
1400  (let ((slot (assq locale-name cjk-char-width-table-list)))
1401    (or slot (error "Unknown locale for CJK language environment: %s"
1402		    locale-name))
1403    (unless (nth 1 slot)
1404      (let ((table (make-char-table nil)))
1405	(dolist (charset-info (nthcdr 2 slot))
1406	  (let ((charset (car charset-info)))
1407	    (dolist (code-range (cdr charset-info))
1408              (map-charset-chars (lambda (range _arg)
1409                                   (set-char-table-range table range 2))
1410				 charset nil
1411				 (car code-range) (cdr code-range)))))
1412	(optimize-char-table table)
1413	(set-char-table-parent table char-width-table)
1414	(setcar (cdr slot) table)))
1415    (setq char-width-table (nth 1 slot))))
1416
1417(defun use-default-char-width-table ()
1418  "Internal use only.
1419Setup `char-width-table' appropriate for non-CJK language environment."
1420  (while (char-table-parent char-width-table)
1421    (setq char-width-table (char-table-parent char-width-table))))
1422
1423(optimize-char-table (standard-case-table))
1424(optimize-char-table (standard-syntax-table))
1425
1426
1427;; Setting char-script-table.
1428(if dump-mode
1429    ;; While dumping, we can't use require, and international is not
1430    ;; in load-path.
1431    (progn
1432      (load "international/charscript")
1433      (load "international/emoji-zwj"))
1434  (progn
1435    (require 'charscript)
1436    (require 'emoji-zwj)))
1437
1438(map-charset-chars
1439 (lambda (range _ignore)
1440   (set-char-table-range char-script-table range 'tibetan))
1441 'tibetan)
1442
1443
1444;;; Setting unicode-category-table.
1445
1446(when (setq unicode-category-table
1447	    (unicode-property-table-internal 'general-category))
1448  (map-char-table (lambda (key val)
1449                    (if val
1450                        (cond ((or (and (/= (aref (symbol-name val) 0) ?M)
1451                                        (/= (aref (symbol-name val) 0) ?C))
1452                                   (eq val 'Zs))
1453                               (modify-category-entry key ?.))
1454                              ((eq val 'Mn)
1455                               (modify-category-entry key ?^)))))
1456		  unicode-category-table))
1457
1458(optimize-char-table (standard-category-table))
1459
1460
1461;; Display of glyphless characters.
1462
1463(defvar char-acronym-table
1464  (make-char-table 'char-acronym-table nil)
1465  "Char table of acronyms for non-graphic characters.")
1466
1467(let ((c0-acronyms '("NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" "BEL"
1468		     "BS"   nil   nil  "VT"  "FF"  "CR"  "SO"  "SI"
1469		     "DLE" "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB"
1470		     "CAN" "EM"  "SUB" "ESC" "FC"  "GS"  "RS"  "US")))
1471  (dotimes (i 32)
1472    (aset char-acronym-table i (car c0-acronyms))
1473    (setq c0-acronyms (cdr c0-acronyms))))
1474
1475(let ((c1-acronyms '("PAD" "HOP" "BPH" "NBH" "IND" "NEL" "SSA" "ESA"
1476		     "HTS" "HTJ" "VTS" "PLD" "PLU" "R1"  "SS2" "SS1"
1477		     "DCS" "PU1" "PU2" "STS" "CCH" "MW"  "SPA" "EPA"
1478		     "SOS" "SGCI" "SC1" "CSI" "ST"  "OSC" "PM"  "APC")))
1479  (dotimes (i 32)
1480    (aset char-acronym-table (+ #x0080 i) (car c1-acronyms))
1481    (setq c1-acronyms (cdr c1-acronyms))))
1482
1483(aset char-acronym-table #x17B4 "KIVAQ")   ; KHMER VOWEL INHERENT AQ
1484(aset char-acronym-table #x17B5 "KIVAA")   ; KHMER VOWEL INHERENT AA
1485(aset char-acronym-table #x200B "ZWSP")    ; ZERO WIDTH SPACE
1486(aset char-acronym-table #x200C "ZWNJ")    ; ZERO WIDTH NON-JOINER
1487(aset char-acronym-table #x200D "ZWJ")	   ; ZERO WIDTH JOINER
1488(aset char-acronym-table #x200E "LRM")	   ; LEFT-TO-RIGHT MARK
1489(aset char-acronym-table #x200F "RLM")	   ; RIGHT-TO-LEFT MARK
1490(aset char-acronym-table #x202A "LRE")	   ; LEFT-TO-RIGHT EMBEDDING
1491(aset char-acronym-table #x202B "RLE")	   ; RIGHT-TO-LEFT EMBEDDING
1492(aset char-acronym-table #x202C "PDF")	   ; POP DIRECTIONAL FORMATTING
1493(aset char-acronym-table #x202D "LRO")	   ; LEFT-TO-RIGHT OVERRIDE
1494(aset char-acronym-table #x202E "RLO")	   ; RIGHT-TO-LEFT OVERRIDE
1495(aset char-acronym-table #x2060 "WJ")	   ; WORD JOINER
1496(aset char-acronym-table #x2066 "LRI")	   ; LEFT-TO-RIGHT ISOLATE
1497(aset char-acronym-table #x2067 "RLI")	   ; RIGHT-TO-LEFT ISOLATE
1498(aset char-acronym-table #x2069 "PDI")	   ; POP DIRECTIONAL ISOLATE
1499(aset char-acronym-table #x206A "ISS")	   ; INHIBIT SYMMETRIC SWAPPING
1500(aset char-acronym-table #x206B "ASS")	   ; ACTIVATE SYMMETRIC SWAPPING
1501(aset char-acronym-table #x206C "IAFS")    ; INHIBIT ARABIC FORM SHAPING
1502(aset char-acronym-table #x206D "AAFS")    ; ACTIVATE ARABIC FORM SHAPING
1503(aset char-acronym-table #x206E "NADS")    ; NATIONAL DIGIT SHAPES
1504(aset char-acronym-table #x206F "NODS")    ; NOMINAL DIGIT SHAPES
1505(aset char-acronym-table #xFEFF "ZWNBSP")  ; ZERO WIDTH NO-BREAK SPACE
1506(aset char-acronym-table #xFFF9 "IAA")	   ; INTERLINEAR ANNOTATION ANCHOR
1507(aset char-acronym-table #xFFFA "IAS")     ; INTERLINEAR ANNOTATION SEPARATOR
1508(aset char-acronym-table #xFFFB "IAT")     ; INTERLINEAR ANNOTATION TERMINATOR
1509(aset char-acronym-table #x1D173 "BEGBM")  ; MUSICAL SYMBOL BEGIN BEAM
1510(aset char-acronym-table #x1D174 "ENDBM")  ; MUSICAL SYMBOL END BEAM
1511(aset char-acronym-table #x1D175 "BEGTIE") ; MUSICAL SYMBOL BEGIN TIE
1512(aset char-acronym-table #x1D176 "END")	   ; MUSICAL SYMBOL END TIE
1513(aset char-acronym-table #x1D177 "BEGSLR") ; MUSICAL SYMBOL BEGIN SLUR
1514(aset char-acronym-table #x1D178 "ENDSLR") ; MUSICAL SYMBOL END SLUR
1515(aset char-acronym-table #x1D179 "BEGPHR") ; MUSICAL SYMBOL BEGIN PHRASE
1516(aset char-acronym-table #x1D17A "ENDPHR") ; MUSICAL SYMBOL END PHRASE
1517(aset char-acronym-table #xE0001 "|->TAG") ; LANGUAGE TAG
1518(aset char-acronym-table #xE0020 "SP TAG") ; TAG SPACE
1519(dotimes (i 94)
1520  (aset char-acronym-table (+ #xE0021 i) (format " %c TAG" (+ 33 i))))
1521(aset char-acronym-table #xE007F "->|TAG") ; CANCEL TAG
1522
1523;; We can't use the \N{name} things here, because this file is used
1524;; too early in the build process.
1525(defvar glyphless--bidi-control-characters
1526  '(#x202a			     ; ?\N{left-to-right embedding}
1527    #x202b			     ; ?\N{right-to-left embedding}
1528    #x202d			     ; ?\N{left-to-right override}
1529    #x202e			     ; ?\N{right-to-left override}
1530    #x2066			     ; ?\N{left-to-right isolate}
1531    #x2067			     ; ?\N{right-to-left isolate}
1532    #x2068			     ; ?\N{first strong isolate}
1533    #x202c			     ; ?\N{pop directional formatting}
1534    #x2069))                         ; ?\N{pop directional isolate})
1535
1536(defun update-glyphless-char-display (&optional variable value)
1537  "Make the setting of `glyphless-char-display-control' take effect.
1538This function updates the char-table `glyphless-char-display',
1539and is intended to be used in the `:set' attribute of the
1540option `glyphless-char-display'."
1541  (when variable
1542    (set-default variable value))
1543  (dolist (elt value)
1544    (let ((target (car elt))
1545	  (method (cdr elt)))
1546      (unless (memq method '( zero-width thin-space empty-box
1547                              acronym hex-code bidi-control))
1548	(error "Invalid glyphless character display method: %s" method))
1549      (cond ((eq target 'c0-control)
1550	     (glyphless-set-char-table-range glyphless-char-display
1551					     #x00 #x1F method)
1552	     ;; Users will not expect their newlines and TABs be
1553	     ;; displayed as anything but themselves, so exempt those
1554	     ;; two characters from c0-control.
1555	     (set-char-table-range glyphless-char-display #x9 nil)
1556	     (set-char-table-range glyphless-char-display #xa nil))
1557	    ((eq target 'c1-control)
1558	     (glyphless-set-char-table-range glyphless-char-display
1559					     #x80 #x9F method))
1560	    ((eq target 'variation-selectors)
1561	     (glyphless-set-char-table-range glyphless-char-display
1562					     #xFE00 #xFE0F method))
1563	    ((or (eq target 'format-control)
1564                 (eq target 'bidi-control))
1565	     (when unicode-category-table
1566	       (map-char-table
1567                (lambda (char category)
1568                  (when (eq category 'Cf)
1569                    (let ((this-method method)
1570                          from to)
1571                      (if (consp char)
1572                          (setq from (car char) to (cdr char))
1573                        (setq from char to char))
1574                      (while (<= from to)
1575                        (when (/= from #xAD)
1576                          (when (eq method 'acronym)
1577                            (setq this-method
1578                                  (or (aref char-acronym-table from)
1579                                      "UNK")))
1580                          (when (or (eq target 'format-control)
1581                                    (memq from
1582                                          glyphless--bidi-control-characters))
1583                            (set-char-table-range glyphless-char-display
1584                                                  from this-method)))
1585                        (setq from (1+ from))))))
1586		unicode-category-table)))
1587	    ((eq target 'no-font)
1588	     (set-char-table-extra-slot glyphless-char-display 0 method))
1589	    (t
1590	     (error "Invalid glyphless character group: %s" target))))))
1591
1592(defun glyphless-set-char-table-range (chartable from to method)
1593  (if (eq method 'acronym)
1594      (let ((i from))
1595	(while (<= i to)
1596	  (set-char-table-range chartable i (aref char-acronym-table i))
1597	  (setq i (1+ i))))
1598    (set-char-table-range chartable (cons from to) method)))
1599
1600;;; Control of displaying glyphless characters.
1601(define-widget 'glyphless-char-display-method 'lazy
1602  "Display method for glyphless characters."
1603  :group 'mule
1604  :format "%v"
1605  :value 'thin-space
1606  :type
1607  '(choice
1608    (const :tag "Don't display" zero-width)
1609    (const :tag "Display as thin space" thin-space)
1610    (const :tag "Display as empty box" empty-box)
1611    (const :tag "Display acronym" acronym)
1612    (const :tag "Display hex code in a box" hex-code)))
1613
1614(defcustom glyphless-char-display-control
1615  '((format-control . thin-space)
1616    (variation-selectors . thin-space)
1617    (no-font . hex-code))
1618  "List of directives to control display of glyphless characters.
1619
1620Each element has the form (GROUP . METHOD), where GROUP is a
1621symbol specifying the character group, and METHOD is a symbol
1622specifying the method of displaying characters belonging to that
1623group.
1624
1625GROUP must be one of these symbols:
1626  `c0-control':     U+0000..U+001F, but excluding newline and TAB.
1627  `c1-control':     U+0080..U+009F.
1628  `format-control': Characters of Unicode General Category `Cf',
1629                    such as U+200C (ZWNJ), U+200E (LRM), but
1630                    excluding characters that have graphic images,
1631                    such as U+00AD (SHY).
1632  `bidi-control':   A subset of `format-control', but only characters
1633                    that are relevant for bidirectional formatting control,
1634                    like U+2069 (PDI) and U+202B (RLE).
1635  `variation-selectors':
1636                    Characters in the range U+FE00..U+FE0F, used for
1637                    selecting alternate glyph presentations, such as
1638                    Emoji vs Text presentation, of the preceding
1639                    character(s).
1640  `no-font':        For GUI frames, characters for which no suitable
1641                    font is found; for text-mode frames, characters
1642                    that cannot be encoded by `terminal-coding-system'.
1643
1644METHOD must be one of these symbols:
1645  `zero-width': don't display.
1646  `thin-space': display a thin (1-pixel width) space.  On character
1647                terminals, display as 1-character space.
1648  `empty-box':  display an empty box.
1649  `acronym':    display an acronym of the character in a box.  The
1650                acronym is taken from `char-acronym-table', which see.
1651  `hex-code':   display the hexadecimal character code in a box.
1652
1653Do not set its value directly from Lisp; the value takes effect
1654only via a custom `:set'
1655function (`update-glyphless-char-display'), which updates
1656`glyphless-char-display'."
1657  :version "28.1"
1658  :type '(alist :key-type (symbol :tag "Character Group")
1659		:value-type (symbol :tag "Display Method"))
1660  :options '((c0-control glyphless-char-display-method)
1661	     (c1-control glyphless-char-display-method)
1662	     (format-control glyphless-char-display-method)
1663	     (bidi-control glyphless-char-display-method)
1664	     (variation-selectors glyphless-char-display-method)
1665	     (no-font (glyphless-char-display-method :value hex-code)))
1666  :set 'update-glyphless-char-display
1667  :group 'display)
1668
1669
1670;;; Setting word boundary.
1671
1672(setq word-combining-categories
1673      '((nil . ?^)
1674	(?^ . nil)
1675	(?C . ?H)
1676	(?C . ?K)))
1677
1678(setq word-separating-categories	;  (2-byte character sets)
1679      '((?H . ?K)			; Hiragana - Katakana
1680	))
1681
1682;; Local Variables:
1683;; coding: utf-8
1684;; End:
1685
1686;;; characters.el ends here
1687