1;;; mule-conf.el --- configure multilingual environment  -*- lexical-binding: t; -*-
2
3;; Copyright (C) 1997-2021 Free Software Foundation, Inc.
4;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5;;   National Institute of Advanced Industrial Science and Technology (AIST)
6;;   Registration Number H14PRO021
7;; Copyright (C) 2003
8;;   National Institute of Advanced Industrial Science and Technology (AIST)
9;;   Registration Number H13PRO009
10
11;; Keywords: i18n, mule, multilingual, character set, coding system
12
13;; This file is part of GNU Emacs.
14
15;; GNU Emacs is free software: you can redistribute it and/or modify
16;; it under the terms of the GNU General Public License as published by
17;; the Free Software Foundation, either version 3 of the License, or
18;; (at your option) any later version.
19
20;; GNU Emacs is distributed in the hope that it will be useful,
21;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23;; GNU General Public License for more details.
24
25;; You should have received a copy of the GNU General Public License
26;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
27
28;;; Commentary:
29
30;; This file defines the Emacs charsets and some basic coding systems.
31;; Other coding systems are defined in the files in directory
32;; lisp/language.
33
34;;; Code:
35
36;;; Remarks
37
38;; The ISO-IR registry is maintained by the Information Processing
39;; Society of Japan/Information Technology Standards Commission of
40;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/.
41;; Standards docs equivalent to iso-2022 and iso-8859 are at
42;; https://www.ecma.ch/.
43
44;; FWIW, https://www.microsoft.com/globaldev/ lists the following for
45;; MS Windows, which are presumably the only charsets we really need
46;; to worry about on such systems:
47;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
48;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
49;;                      1258, 874, 932, 936, 949, 950
50
51;;; Definitions of character sets.
52
53;; The charsets `ascii', `unicode' and `eight-bit' are already defined
54;; in charset.c as below:
55;;
56;; (define-charset 'ascii
57;;   ""
58;;   :dimension 1
59;;   :code-space [0 127]
60;;   :iso-final-char ?B
61;;   :ascii-compatible-p t
62;;   :emacs-mule-id 0
63;;   :code-offset 0)
64;;
65;; (define-charset 'unicode
66;;   ""
67;;   :dimension 3
68;;   :code-space [0 255 0 255 0 16]
69;;   :ascii-compatible-p t
70;;   :code-offset 0)
71;;
72;; (define-charset 'emacs
73;;   ""
74;;   :dimension 3
75;;   :code-space [0 255 0 255 0 63]
76;;   :ascii-compatible-p t
77;;   :supplementary-p t
78;;   :code-offset 0)
79;;
80;; (define-charset 'eight-bit
81;;   ""
82;;   :dimension 1
83;;   :code-space [128 255]
84;;   :code-offset #x3FFF80)
85;;
86;; We now set :docstring, :short-name, and :long-name properties.
87
88(put-charset-property
89 'ascii :docstring "ASCII (ISO646 IRV)")
90(put-charset-property
91 'ascii :short-name "ASCII")
92(put-charset-property
93 'ascii :long-name "ASCII (ISO646 IRV)")
94(put-charset-property
95 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
96(put-charset-property
97 'iso-8859-1 :short-name "Latin-1")
98(put-charset-property
99 'iso-8859-1 :long-name "Latin-1")
100(put-charset-property
101 'unicode :docstring "Unicode (ISO10646)")
102(put-charset-property
103 'unicode :short-name "Unicode")
104(put-charset-property
105 'unicode :long-name "Unicode (ISO10646)")
106(put-charset-property
107 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
108(put-charset-property
109 'emacs :short-name "Emacs")
110(put-charset-property
111 'emacs :long-name "Emacs")
112
113(put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
114(put-charset-property 'eight-bit :short-name "Raw bytes")
115
116(define-charset-alias 'ucs 'unicode)
117
118(define-charset 'latin-iso8859-1
119  "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
120  :short-name "RHP of Latin-1"
121  :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
122  :iso-final-char ?A
123  :emacs-mule-id 129
124  :code-space [32 127]
125  :code-offset 160)
126
127;; Name perhaps not ideal, but is XEmacs-compatible.
128(define-charset 'control-1
129  "8-bit control code (0x80..0x9F)"
130  :short-name "8-bit control code"
131  :code-space [128 159]
132  :code-offset 128)
133
134(define-charset 'eight-bit-control
135  "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
136  :short-name "Raw bytes 0x80..0x9F"
137  :supplementary-p t
138  :code-space [128 159]
139  :code-offset #x3FFF80)		; see character.h
140
141(define-charset 'eight-bit-graphic
142  "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
143  :short-name "Raw bytes 0xA0..0xFF"
144  :supplementary-p t
145  :code-space [160 255]
146  :code-offset #x3FFFA0)		; see character.h
147
148(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
149						 iso-ir iso-final
150						 emacs-mule-id map)
151  (declare (indent defun))
152  `(progn
153     (define-charset ,symbol
154       ,name
155       :short-name ,nickname
156       :long-name ,name
157       :ascii-compatible-p t
158       :code-space [0 255]
159       :map ,map)
160     (if ,iso-symbol
161	 (define-charset ,iso-symbol
162	   (if ,iso-ir
163	       (format "Right-Hand Part of %s (%s): ISO-IR-%d"
164		       ,name ,nickname ,iso-ir)
165	     (format "Right-Hand Part of %s (%s)" ,name ,nickname))
166	   :short-name (format "RHP of %s" ,name)
167	   :long-name (format "RHP of %s (%s)" ,name ,nickname)
168	   :iso-final-char ,iso-final
169	   :emacs-mule-id ,emacs-mule-id
170	   :code-space [32 127]
171	   :subset (list ,symbol 160 255 -128)))))
172
173(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
174  "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
175
176(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
177  "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
178
179(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
180  "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
181
182(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
183  "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
184
185(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
186  "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
187
188(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
189  "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
190
191(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
192  "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
193
194(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
195  "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
196
197(define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
198  "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
199
200;; http://www.nectec.or.th/it-standards/iso8859-11/
201;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
202;; plus nbsp
203(define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
204  "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
205
206;; 8859-12 doesn't (yet?) exist.
207
208(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
209  "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
210
211(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
212  "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
213
214(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
215  "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
216
217(define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
218  "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
219
220;; No point in keeping it around.
221(fmakunbound 'define-iso-single-byte-charset)
222
223;; Can this be shared with 8859-11?
224;; N.b. not all of these are defined in Unicode.
225(define-charset 'thai-tis620
226  "MULE charset for TIS620.2533"
227  :short-name "TIS620.2533"
228  :iso-final-char ?T
229  :emacs-mule-id 133
230  :code-space [32 127]
231  :code-offset #x0E00)
232
233(define-charset 'tis620-2533
234  "TIS620.2533, a.k.a. TIS-620.  Like `thai-iso8859-11', but without NBSP."
235  :short-name "TIS620.2533"
236  :ascii-compatible-p t
237  :code-space [0 255]
238  :superset '(ascii (thai-tis620 . 128)))
239
240(define-charset 'jisx0201
241  "JISX0201"
242  :short-name "JISX0201"
243  :code-space [0 #xDF]
244  :map "JISX0201")
245
246(define-charset 'latin-jisx0201
247  "Roman Part of JISX0201.1976"
248  :short-name "JISX0201 Roman"
249  :long-name "Japanese Roman (JISX0201.1976)"
250  :iso-final-char ?J
251  :emacs-mule-id  138
252  :supplementary-p t
253  :code-space [33 126]
254  :subset '(jisx0201 33 126 0))
255
256(define-charset 'katakana-jisx0201
257  "Katakana Part of JISX0201.1976"
258  :short-name "JISX0201 Katakana"
259  :long-name "Japanese Katakana (JISX0201.1976)"
260  :iso-final-char ?I
261  :emacs-mule-id  137
262  :supplementary-p t
263  :code-space [33 126]
264  :subset '(jisx0201 161 254 -128))
265
266(define-charset 'chinese-gb2312
267  "GB2312 Chinese simplified: ISO-IR-58"
268  :short-name "GB2312"
269  :long-name "GB2312: ISO-IR-58"
270  :iso-final-char ?A
271  :emacs-mule-id 145
272  :code-space [33 126 33 126]
273  :code-offset #x110000
274  :unify-map "GB2312")
275
276(define-charset 'chinese-gbk
277  "GBK Chinese simplified."
278  :short-name "GBK"
279  :code-space [#x40 #xFE #x81 #xFE]
280  :code-offset #x160000
281  :unify-map "GBK")
282(define-charset-alias 'cp936 'chinese-gbk)
283(define-charset-alias 'windows-936 'chinese-gbk)
284
285(define-charset 'chinese-cns11643-1
286  "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
287  :short-name "CNS11643-1"
288  :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
289  :iso-final-char ?G
290  :emacs-mule-id  149
291  :code-space [33 126 33 126]
292  :code-offset #x114000
293  :unify-map "CNS-1")
294
295(define-charset 'chinese-cns11643-2
296  "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
297  :short-name "CNS11643-2"
298  :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
299  :iso-final-char ?H
300  :emacs-mule-id  150
301  :code-space [33 126 33 126]
302  :code-offset #x118000
303  :unify-map "CNS-2")
304
305(define-charset 'chinese-cns11643-3
306  "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
307  :short-name  "CNS11643-3"
308  :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
309  :iso-final-char ?I
310  :code-space [33 126 33 126]
311  :emacs-mule-id  246
312  :code-offset #x11C000
313  :unify-map "CNS-3")
314
315(define-charset 'chinese-cns11643-4
316  "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
317  :short-name  "CNS11643-4"
318  :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
319  :iso-final-char ?J
320  :emacs-mule-id  247
321  :code-space [33 126 33 126]
322  :code-offset #x120000
323  :unify-map "CNS-4")
324
325(define-charset 'chinese-cns11643-5
326  "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
327  :short-name  "CNS11643-5"
328  :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
329  :iso-final-char ?K
330  :emacs-mule-id  248
331  :code-space [33 126 33 126]
332  :code-offset #x124000
333  :unify-map "CNS-5")
334
335(define-charset 'chinese-cns11643-6
336  "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
337  :short-name  "CNS11643-6"
338  :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
339  :iso-final-char ?L
340  :emacs-mule-id 249
341  :code-space [33 126 33 126]
342  :code-offset #x128000
343  :unify-map "CNS-6")
344
345(define-charset 'chinese-cns11643-7
346  "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
347  :short-name  "CNS11643-7"
348  :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
349  :iso-final-char ?M
350  :emacs-mule-id 250
351  :code-space [33 126 33 126]
352  :code-offset #x12C000
353  :unify-map "CNS-7")
354
355(define-charset 'big5
356  "Big5 (Chinese traditional)"
357  :short-name "Big5"
358  :code-space [#x40 #xFE #xA1 #xFE]
359  :code-offset #x130000
360  :unify-map "BIG5")
361;; Fixme: AKA cp950 according to
362;; <URL:https://www.microsoft.com/globaldev/reference/WinCP.asp>.  Is
363;; that correct?
364
365(define-charset 'chinese-big5-1
366  "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
367  :short-name "Big5 (Level-1)"
368  :long-name "Big5 (Level-1) A141-C67F"
369  :iso-final-char ?0
370  :emacs-mule-id 152
371  :supplementary-p t
372  :code-space [#x21 #x7E #x21 #x7E]
373  :code-offset #x135000
374  :unify-map "BIG5-1")
375
376(define-charset 'chinese-big5-2
377  "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
378  :short-name "Big5 (Level-2)"
379  :long-name "Big5 (Level-2) C940-FEFE"
380  :iso-final-char ?1
381  :emacs-mule-id  153
382  :supplementary-p t
383  :code-space [#x21 #x7E #x21 #x7E]
384  :code-offset #x137800
385  :unify-map "BIG5-2")
386
387(define-charset 'japanese-jisx0208
388  "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
389  :short-name "JISX0208"
390  :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
391  :iso-final-char ?B
392  :emacs-mule-id 146
393  :code-space [33 126 33 126]
394  :code-offset #x140000
395  :unify-map "JISX0208")
396
397(define-charset 'japanese-jisx0208-1978
398  "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
399  :short-name "JISX0208.1978"
400  :long-name  "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
401  :iso-final-char ?@
402  :emacs-mule-id  144
403  :code-space [33 126 33 126]
404  :code-offset #x144000
405  :unify-map "JISC6226")
406
407(define-charset 'japanese-jisx0212
408  "JISX0212 Japanese supplement: ISO-IR-159"
409  :short-name "JISX0212"
410  :long-name "JISX0212 (Japanese): ISO-IR-159"
411  :iso-final-char ?D
412  :emacs-mule-id 148
413  :code-space [33 126 33 126]
414  :code-offset #x148000
415  :unify-map "JISX0212")
416
417;; Note that jisx0213 contains characters not in Unicode (3.2?).  It's
418;; arguable whether it should have a unify-map.
419(define-charset 'japanese-jisx0213-1
420  "JISX0213.2000 Plane 1 (Japanese)"
421  :short-name "JISX0213-1"
422  :iso-final-char ?O
423  :emacs-mule-id  151
424  :unify-map "JISX2131"
425  :code-space [33 126 33 126]
426  :code-offset #x14C000)
427
428(define-charset 'japanese-jisx0213-2
429  "JISX0213.2000 Plane 2 (Japanese)"
430  :short-name "JISX0213-2"
431  :iso-final-char ?P
432  :emacs-mule-id 254
433  :unify-map "JISX2132"
434  :code-space [33 126 33 126]
435  :code-offset #x150000)
436
437(define-charset 'japanese-jisx0213-a
438  "JISX0213.2004 adds these characters to JISX0213.2000."
439  :short-name "JISX0213A"
440  :dimension 2
441  :code-space [33 126 33 126]
442  :supplementary-p t
443  :map "JISX213A")
444
445(define-charset 'japanese-jisx0213.2004-1
446  "JISX0213.2004 Plane1 (Japanese)"
447  :short-name "JISX0213.2004-1"
448  :dimension 2
449  :code-space [33 126 33 126]
450  :iso-final-char ?Q
451  :superset '(japanese-jisx0213-a japanese-jisx0213-1))
452
453(define-charset 'katakana-sjis
454  "Katakana part of Shift-JIS"
455  :dimension 1
456  :code-space [#xA1 #xDF]
457  :subset '(jisx0201 #xA1 #xDF 0)
458  :supplementary-p t)
459
460(define-charset 'cp932-2-byte
461  "2-byte part of CP932"
462  :dimension 2
463  :map "CP932-2BYTE"
464  :code-space [#x40 #xFC #x81 #xFC]
465  :supplementary-p t)
466
467(define-charset 'cp932
468  "CP932 (Microsoft shift-jis)"
469  :code-space [#x00 #xFF #x00 #xFE]
470  :short-name "CP932"
471  :superset '(ascii katakana-sjis cp932-2-byte))
472
473(define-charset 'korean-ksc5601
474  "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
475  :short-name "KSC5601"
476  :long-name "KSC5601 (Korean): ISO-IR-149"
477  :iso-final-char ?C
478  :emacs-mule-id 147
479  :code-space [33 126 33 126]
480  :code-offset #x279f94			; ... #x27c217
481  :unify-map "KSC5601")
482
483(define-charset 'big5-hkscs
484  "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
485  :short-name "Big5"
486  :code-space [#x40 #xFE #xA1 #xFE]
487  :code-offset #x27c218			; ... #x280839
488  :unify-map "BIG5-HKSCS")
489
490(define-charset 'cp949-2-byte
491  "2-byte part of CP949"
492  :dimension 2
493  :map "CP949-2BYTE"
494  :code-space [#x41 #xFE #x81 #xFD]
495  :supplementary-p t)
496
497(define-charset 'cp949
498  "CP949 (Korean)"
499  :short-name "CP949"
500  :long-name  "CP949 (Korean)"
501  :code-space [#x00 #xFE #x00 #xFD]
502  :superset '(ascii cp949-2-byte))
503
504(define-charset 'chinese-sisheng
505  "SiSheng characters for PinYin/ZhuYin"
506  :short-name "SiSheng"
507  :long-name "SiSheng (PinYin/ZhuYin)"
508  :iso-final-char ?0
509  :emacs-mule-id 160
510  :code-space [33 126]
511  :unify-map "MULE-sisheng"
512  :supplementary-p t
513  :code-offset #x200000)
514
515;; A subset of the 1989 version of IPA.  It consists of the consonant
516;; signs used in English, French, German and Italian, and all vowels
517;; signs in the table.  [says old MULE doc]
518(define-charset 'ipa
519  "IPA (International Phonetic Association)"
520  :short-name "IPA"
521  :iso-final-char ?0
522  :emacs-mule-id  161
523  :unify-map "MULE-ipa"
524  :code-space [32 127]
525  :supplementary-p t
526  :code-offset #x200080)
527
528(define-charset 'viscii
529  "VISCII1.1"
530  :short-name "VISCII"
531  :long-name "VISCII 1.1"
532  :code-space [0 255]
533  :map "VISCII")
534
535(define-charset 'vietnamese-viscii-lower
536  "VISCII1.1 lower-case"
537  :short-name "VISCII lower"
538  :long-name "VISCII lower-case"
539  :iso-final-char ?1
540  :emacs-mule-id  162
541  :code-space [32 127]
542  :code-offset #x200200
543  :supplementary-p t
544  :unify-map "MULE-lviscii")
545
546(define-charset 'vietnamese-viscii-upper
547  "VISCII1.1 upper-case"
548  :short-name "VISCII upper"
549  :long-name "VISCII upper-case"
550  :iso-final-char ?2
551  :emacs-mule-id  163
552  :code-space [32 127]
553  :code-offset #x200280
554  :supplementary-p t
555  :unify-map "MULE-uviscii")
556
557(define-charset 'vscii
558  "VSCII1.1 (TCVN-5712 VN1)"
559  :short-name "VSCII"
560  :code-space [0 255]
561  :map "VSCII")
562
563(define-charset-alias 'tcvn-5712 'vscii)
564
565;; Fixme: see note in tcvn.map about combining characters
566(define-charset 'vscii-2
567  "VSCII-2 (TCVN-5712 VN2)"
568  :code-space [0 255]
569  :map "VSCII-2")
570
571(define-charset 'koi8-r
572  "KOI8-R"
573  :short-name "KOI8-R"
574  :ascii-compatible-p t
575  :code-space [0 255]
576  :map "KOI8-R")
577
578(define-charset-alias 'koi8 'koi8-r)
579
580(define-charset 'alternativnyj
581  "ALTERNATIVNYJ"
582  :short-name "alternativnyj"
583  :ascii-compatible-p t
584  :code-space [0 255]
585  :map "ALTERNATIVNYJ")
586
587(define-charset 'cp866
588  "CP866"
589  :short-name "cp866"
590  :ascii-compatible-p t
591  :code-space [0 255]
592  :map "IBM866")
593(define-charset-alias 'ibm866 'cp866)
594
595(define-charset 'koi8-u
596  "KOI8-U"
597  :short-name "KOI8-U"
598  :ascii-compatible-p t
599  :code-space [0 255]
600  :map "KOI8-U")
601
602(define-charset 'koi8-t
603  "KOI8-T"
604  :short-name "KOI8-T"
605  :ascii-compatible-p t
606  :code-space [0 255]
607  :map "KOI8-T")
608
609(define-charset 'georgian-ps
610  "GEORGIAN-PS"
611  :short-name "GEORGIAN-PS"
612  :ascii-compatible-p t
613  :code-space [0 255]
614  :map "KA-PS")
615
616(define-charset 'georgian-academy
617  "GEORGIAN-ACADEMY"
618  :short-name "GEORGIAN-ACADEMY"
619  :ascii-compatible-p t
620  :code-space [0 255]
621  :map "KA-ACADEMY")
622
623(define-charset 'windows-1250
624  "WINDOWS-1250 (Central Europe)"
625  :short-name "WINDOWS-1250"
626  :ascii-compatible-p t
627  :code-space [0 255]
628  :map "CP1250")
629(define-charset-alias 'cp1250 'windows-1250)
630
631(define-charset 'windows-1251
632  "WINDOWS-1251 (Cyrillic)"
633  :short-name "WINDOWS-1251"
634  :ascii-compatible-p t
635  :code-space [0 255]
636  :map "CP1251")
637(define-charset-alias 'cp1251 'windows-1251)
638
639(define-charset 'windows-1252
640  "WINDOWS-1252 (Latin I)"
641  :short-name "WINDOWS-1252"
642  :ascii-compatible-p t
643  :code-space [0 255]
644  :map "CP1252")
645(define-charset-alias 'cp1252 'windows-1252)
646
647(define-charset 'windows-1253
648  "WINDOWS-1253 (Greek)"
649  :short-name "WINDOWS-1253"
650  :ascii-compatible-p t
651  :code-space [0 255]
652  :map "CP1253")
653(define-charset-alias 'cp1253 'windows-1253)
654
655(define-charset 'windows-1254
656  "WINDOWS-1254 (Turkish)"
657  :short-name "WINDOWS-1254"
658  :ascii-compatible-p t
659  :code-space [0 255]
660  :map "CP1254")
661(define-charset-alias 'cp1254 'windows-1254)
662
663(define-charset 'windows-1255
664  "WINDOWS-1255 (Hebrew)"
665  :short-name "WINDOWS-1255"
666  :ascii-compatible-p t
667  :code-space [0 255]
668  :map "CP1255")
669(define-charset-alias 'cp1255 'windows-1255)
670
671(define-charset 'windows-1256
672  "WINDOWS-1256 (Arabic)"
673  :short-name "WINDOWS-1256"
674  :ascii-compatible-p t
675  :code-space [0 255]
676  :map "CP1256")
677(define-charset-alias 'cp1256 'windows-1256)
678
679(define-charset 'windows-1257
680  "WINDOWS-1257 (Baltic)"
681  :short-name "WINDOWS-1257"
682  :ascii-compatible-p t
683  :code-space [0 255]
684  :map "CP1257")
685(define-charset-alias 'cp1257 'windows-1257)
686
687(define-charset 'windows-1258
688  "WINDOWS-1258 (Viet Nam)"
689  :short-name "WINDOWS-1258"
690  :ascii-compatible-p t
691  :code-space [0 255]
692  :map "CP1258")
693(define-charset-alias 'cp1258 'windows-1258)
694
695(define-charset 'next
696  "NEXT"
697  :short-name "NEXT"
698  :ascii-compatible-p t
699  :code-space [0 255]
700  :map "NEXTSTEP")
701
702(define-charset 'cp1125
703  "CP1125"
704  :short-name "CP1125"
705  :code-space [0 255]
706  :ascii-compatible-p t
707  :map "CP1125")
708(define-charset-alias 'ruscii 'cp1125)
709;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
710(define-charset-alias 'cp866u 'cp1125)
711
712;; Fixme: C.f. iconv, https://czyborra.com/charsets/codepages.html
713;; shows this as not ASCII compatible, with various graphics in
714;; 0x01-0x1F.
715(define-charset 'cp437
716  "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
717  :short-name "CP437"
718  :code-space [0 255]
719  :ascii-compatible-p t
720  :map "IBM437")
721
722(define-charset 'cp720
723  "CP720 (Arabic)"
724  :short-name "CP720"
725  :code-space [0 255]
726  :ascii-compatible-p t
727  :map "CP720")
728
729(define-charset 'cp737
730  "CP737 (PC Greek)"
731  :short-name "CP737"
732  :code-space [0 255]
733  :ascii-compatible-p t
734  :map "CP737")
735
736(define-charset 'cp775
737  "CP775 (PC Baltic)"
738  :short-name "CP775"
739  :code-space [0 255]
740  :ascii-compatible-p t
741  :map "CP775")
742
743(define-charset 'cp851
744  "CP851 (Greek)"
745  :short-name "CP851"
746  :code-space [0 255]
747  :ascii-compatible-p t
748  :map "IBM851")
749
750(define-charset 'cp852
751  "CP852 (MS-DOS Latin-2)"
752  :short-name "CP852"
753  :code-space [0 255]
754  :ascii-compatible-p t
755  :map "IBM852")
756
757(define-charset 'cp855
758  "CP855 (IBM Cyrillic)"
759  :short-name "CP855"
760  :code-space [0 255]
761  :ascii-compatible-p t
762  :map "IBM855")
763
764(define-charset 'cp857
765  "CP857 (IBM Turkish)"
766  :short-name "CP857"
767  :code-space [0 255]
768  :ascii-compatible-p t
769  :map "IBM857")
770
771(define-charset 'cp858
772  "CP858 (Multilingual Latin I + Euro)"
773  :short-name "CP858"
774  :code-space [0 255]
775  :ascii-compatible-p t
776  :map "CP858")
777(define-charset-alias 'cp00858 'cp858)	; IANA has IBM00858/CP00858
778
779(define-charset 'cp860
780  "CP860 (MS-DOS Portuguese)"
781  :short-name "CP860"
782  :code-space [0 255]
783  :ascii-compatible-p t
784  :map "IBM860")
785
786(define-charset 'cp861
787  "CP861 (MS-DOS Icelandic)"
788  :short-name "CP861"
789  :code-space [0 255]
790  :ascii-compatible-p t
791  :map "IBM861")
792
793(define-charset 'cp862
794  "CP862 (PC Hebrew)"
795  :short-name "CP862"
796  :code-space [0 255]
797  :ascii-compatible-p t
798  :map "IBM862")
799
800(define-charset 'cp863
801  "CP863 (MS-DOS Canadian French)"
802  :short-name "CP863"
803  :code-space [0 255]
804  :ascii-compatible-p t
805  :map "IBM863")
806
807(define-charset 'cp864
808  "CP864 (PC Arabic)"
809  :short-name "CP864"
810  :code-space [0 255]
811  :ascii-compatible-p t
812  :map "IBM864")
813
814(define-charset 'cp865
815  "CP865 (MS-DOS Nordic)"
816  :short-name "CP865"
817  :code-space [0 255]
818  :ascii-compatible-p t
819  :map "IBM865")
820
821(define-charset 'cp869
822  "CP869 (IBM Modern Greek)"
823  :short-name "CP869"
824  :code-space [0 255]
825  :ascii-compatible-p t
826  :map "IBM869")
827
828(define-charset 'cp874
829  "CP874 (IBM Thai)"
830  :short-name "CP874"
831  :code-space [0 255]
832  :ascii-compatible-p t
833  :map "IBM874")
834
835;; For Arabic, we need three different types of character sets.
836;; Digits are of direction left-to-right and of width 1-column.
837;; Others are of direction right-to-left and of width 1-column or
838;; 2-column.
839(define-charset 'arabic-digit
840  "Arabic digit"
841  :short-name "Arabic digit"
842  :iso-final-char ?2
843  :emacs-mule-id 164
844  :supplementary-p t
845  :code-space [34 42]
846  :code-offset #x0600)
847
848(define-charset 'arabic-1-column
849  "Arabic 1-column"
850  :short-name "Arabic 1-col"
851  :long-name "Arabic 1-column"
852  :iso-final-char ?3
853  :emacs-mule-id 165
854  :supplementary-p t
855  :code-space [33 126]
856  :code-offset #x200100)
857
858(define-charset 'arabic-2-column
859  "Arabic 2-column"
860  :short-name "Arabic 2-col"
861  :long-name "Arabic 2-column"
862  :iso-final-char ?4
863  :emacs-mule-id 224
864  :supplementary-p t
865  :code-space [33 126]
866  :code-offset #x200180)
867
868;; Lao script.
869;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
870;; Not all of them are defined in Unicode.
871(define-charset 'lao
872  "Lao characters (ISO10646 0E81..0EDF)"
873  :short-name "Lao"
874  :iso-final-char ?1
875  :emacs-mule-id 167
876  :supplementary-p t
877  :code-space [33 126]
878  :code-offset #x0E81)
879
880(define-charset 'mule-lao
881  "Lao characters (ISO10646 0E81..0EDF)"
882  :short-name "Lao"
883  :code-space [0 255]
884  :supplementary-p t
885  :superset '(ascii eight-bit-control (lao . 128)))
886
887
888;; Indian scripts.  Symbolic charset for data exchange.  Glyphs are
889;; not assigned.  They are automatically converted to each Indian
890;; script which IS-13194 supports.
891
892(define-charset 'indian-is13194
893  "7-bit representation of IS 13194 (ISCII) for Devanagari"
894  :short-name "IS 13194 (DEV)"
895  :long-name "Indian IS 13194 (DEV)"
896  :iso-final-char ?5
897  :emacs-mule-id 225
898  :supplementary-p t
899  :code-space [33 126]
900  :code-offset #x180000
901  :unify-map "MULE-is13194")
902
903(let ((code-offset #x180100))
904  (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
905			       oriya kannada malayalam gujarati punjabi))
906    (define-charset (intern (format "%s-cdac" script))
907      (format
908       "Glyphs of %s script for CDAC font.  Subset of `indian-glyph'."
909       (capitalize (symbol-name script)))
910      :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
911      :supplementary-p t
912      :code-space [0 255]
913      :code-offset code-offset)
914    (setq code-offset (+ code-offset #x100)))
915
916  (dolist (script '(devanagari bengali punjabi gujarati
917			       oriya tamil telugu kannada malayalam))
918    (define-charset (intern (format "%s-akruti" script))
919      (format
920       "Glyphs of %s script for AKRUTI font.  Subset of `indian-glyph'."
921       (capitalize (symbol-name script)))
922      :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
923      :supplementary-p t
924      :code-space [0 255]
925      :code-offset code-offset)
926    (setq code-offset (+ code-offset #x100))))
927
928(define-charset 'indian-glyph
929  "Glyphs for Indian characters."
930  :short-name "Indian glyph"
931  :iso-final-char ?4
932  :emacs-mule-id 240
933  :supplementary-p t
934  :code-space [32 127 32 127]
935  :code-offset #x180100)
936
937;; Actual Glyph for 1-column width.
938(define-charset 'indian-1-column
939  "Indian charset for 1-column width glyphs."
940  :short-name "Indian 1-col"
941  :long-name "Indian 1 Column"
942  :iso-final-char ?6
943  :emacs-mule-id  251
944  :supplementary-p t
945  :code-space [33 126 33 126]
946  :code-offset #x184000)
947
948;; Actual Glyph for 2-column width.
949(define-charset 'indian-2-column
950  "Indian charset for 2-column width glyphs."
951  :short-name "Indian 2-col"
952  :long-name "Indian 2 Column"
953  :iso-final-char ?5
954  :emacs-mule-id  251
955  :supplementary-p t
956  :code-space [33 126 33 126]
957  :code-offset #x184000)
958
959(define-charset 'tibetan
960  "Tibetan characters"
961  :iso-final-char ?7
962  :short-name "Tibetan 2-col"
963  :long-name "Tibetan 2 column"
964  :iso-final-char ?7
965  :emacs-mule-id 252
966  :unify-map "MULE-tibetan"
967  :supplementary-p t
968  :code-space [33 126 33 37]
969  :code-offset #x190000)
970
971(define-charset 'tibetan-1-column
972  "Tibetan 1 column glyph"
973  :short-name "Tibetan 1-col"
974  :long-name "Tibetan 1 column"
975  :iso-final-char ?8
976  :emacs-mule-id 241
977  :supplementary-p t
978  :code-space [33 126 33 37]
979  :code-offset #x190000)
980
981;; Subsets of Unicode.
982(define-charset 'mule-unicode-2500-33ff
983  "Unicode characters of the range U+2500..U+33FF."
984  :short-name "Unicode subset 2"
985  :long-name "Unicode subset (U+2500..U+33FF)"
986  :iso-final-char ?2
987  :emacs-mule-id 242
988  :supplementary-p t
989  :code-space [#x20 #x7f #x20 #x47]
990  :code-offset #x2500)
991
992(define-charset 'mule-unicode-e000-ffff
993  "Unicode characters of the range U+E000..U+FFFF."
994  :short-name "Unicode subset 3"
995  :long-name "Unicode subset (U+E000+FFFF)"
996  :iso-final-char ?3
997  :emacs-mule-id 243
998  :supplementary-p t
999  :code-space [#x20 #x7F #x20 #x75]
1000  :code-offset #xE000
1001  :max-code 30015)			; U+FFFF
1002
1003(define-charset 'mule-unicode-0100-24ff
1004  "Unicode characters of the range U+0100..U+24FF."
1005  :short-name "Unicode subset"
1006  :long-name "Unicode subset (U+0100..U+24FF)"
1007  :iso-final-char ?1
1008  :emacs-mule-id 244
1009  :supplementary-p t
1010  :code-space [#x20 #x7F #x20 #x7F]
1011  :code-offset #x100)
1012
1013(define-charset 'unicode-bmp
1014  "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
1015  :short-name "Unicode BMP"
1016  :code-space [0 255 0 255]
1017  :code-offset 0)
1018
1019(define-charset 'unicode-smp
1020  "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
1021  :short-name "Unicode SMP "
1022  :code-space [0 255 0 255]
1023  :code-offset #x10000)
1024
1025(define-charset 'unicode-sip
1026  "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
1027  :short-name "Unicode SIP"
1028  :code-space [0 255 0 255]
1029  :code-offset #x20000)
1030
1031(define-charset 'unicode-ssp
1032  "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
1033  :short-name "Unicode SSP"
1034  :code-space [0 255 0 255]
1035  :code-offset #xE0000)
1036
1037(define-charset 'ethiopic
1038  "Ethiopic characters for Amharic and Tigrigna."
1039  :short-name "Ethiopic"
1040  :long-name "Ethiopic characters"
1041  :iso-final-char ?3
1042  :emacs-mule-id  245
1043  :supplementary-p t
1044  :unify-map "MULE-ethiopic"
1045  :code-space [33 126 33 126]
1046  :code-offset #x1A0000)
1047
1048(define-charset 'mac-roman
1049  "Mac Roman charset"
1050  :short-name "Mac Roman"
1051  :ascii-compatible-p t
1052  :code-space [0 255]
1053  :map "MACINTOSH")
1054
1055;; Fixme: modern EBCDIC variants, e.g. IBM00924?
1056(define-charset 'ebcdic-us
1057  "US version of EBCDIC"
1058  :short-name "EBCDIC-US"
1059  :code-space [0 255]
1060  :mime-charset 'ebcdic-us
1061  :map "EBCDICUS")
1062
1063(define-charset 'ebcdic-uk
1064  "UK version of EBCDIC"
1065  :short-name "EBCDIC-UK"
1066  :code-space [0 255]
1067  :mime-charset 'ebcdic-uk
1068  :map "EBCDICUK")
1069
1070(define-charset 'ibm038
1071  "International version of EBCDIC"
1072  :short-name "IBM038"
1073  :code-space [0 255]
1074  :mime-charset 'ibm038
1075  :map "IBM038")
1076(define-charset-alias 'ebcdic-int 'ibm038)
1077(define-charset-alias 'cp038 'ibm038)
1078
1079(define-charset 'ibm256
1080  "Netherlands version of EBCDIC"
1081  :short-name "IBM256"
1082  :code-space [0 255]
1083  :mime-charset 'ibm256
1084  :map "IBM256")
1085
1086(define-charset 'ibm273
1087  "Austrian / German version of EBCDIC"
1088  :short-name "IBM273"
1089  :code-space [0 255]
1090  :mime-charset 'ibm273
1091  :map "IBM273")
1092
1093(define-charset 'ibm274
1094  "Belgian version of EBCDIC"
1095  :short-name "IBM274"
1096  :code-space [0 255]
1097  :mime-charset 'ibm274
1098  :map "IBM274")
1099
1100(define-charset 'ibm275
1101  "Brazilian version of EBCDIC"
1102  :short-name "IBM275"
1103  :code-space [0 255]
1104  :mime-charset 'ibm275
1105  :map "IBM275")
1106
1107(define-charset 'ibm277
1108  "Danish / Norwegian version of EBCDIC"
1109  :short-name "IBM277"
1110  :code-space [0 255]
1111  :mime-charset 'ibm277
1112  :map "IBM277")
1113
1114(define-charset 'ibm278
1115  "Finnish / Swedish version of EBCDIC"
1116  :short-name "IBM278"
1117  :code-space [0 255]
1118  :mime-charset 'ibm278
1119  :map "IBM278")
1120
1121(define-charset 'ibm280
1122  "Italian version of EBCDIC"
1123  :short-name "IBM280"
1124  :code-space [0 255]
1125  :mime-charset 'ibm270
1126  :map "IBM280")
1127
1128(define-charset 'ibm281
1129  "Japanese-E version of EBCDIC"
1130  :short-name "IBM281"
1131  :code-space [0 255]
1132  :mime-charset 'ibm281
1133  :map "IBM281")
1134
1135(define-charset 'ibm284
1136  "Spanish version of EBCDIC"
1137  :short-name "IBM284"
1138  :code-space [0 255]
1139  :mime-charset 'ibm284
1140  :map "IBM284")
1141
1142(define-charset 'ibm285
1143  "UK english version of EBCDIC"
1144  :short-name "IBM285"
1145  :code-space [0 255]
1146  :mime-charset 'ibm285
1147  :map "IBM285")
1148
1149(define-charset 'ibm290
1150  "Japanese katakana version of EBCDIC"
1151  :short-name "IBM290"
1152  :code-space [0 255]
1153  :mime-charset 'ibm290
1154  :map "IBM290")
1155
1156(define-charset 'ibm297
1157  "French version of EBCDIC"
1158  :short-name "IBM297"
1159  :code-space [0 255]
1160  :mime-charset 'ibm297
1161  :map "IBM297")
1162
1163(define-charset 'ibm1047
1164  ;; Says groff:
1165  "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
1166  :short-name "IBM1047"
1167  :code-space [0 255]
1168  :mime-charset 'ibm1047
1169  :map "IBM1047")
1170(define-charset-alias 'cp1047 'ibm1047)
1171
1172(define-charset 'hp-roman8
1173  "Encoding used by Hewlet-Packard printer software"
1174  :short-name "HP-ROMAN8"
1175  :ascii-compatible-p t
1176  :code-space [0 255]
1177  :map "HP-ROMAN8")
1178
1179;; To make a coding system with this, a pre-write-conversion should
1180;; account for the commented-out multi-valued code points in
1181;; stdenc.map.
1182(define-charset 'adobe-standard-encoding
1183  "Adobe `standard encoding' used in PostScript"
1184  :short-name "ADOBE-STANDARD-ENCODING"
1185  :code-space [#x20 255]
1186  :map "stdenc")
1187
1188(define-charset 'symbol
1189  "Adobe symbol encoding used in PostScript"
1190  :short-name "ADOBE-SYMBOL"
1191  :code-space [#x20 255]
1192  :map "symbol")
1193
1194(define-charset 'ibm850
1195  "DOS codepage 850 (Latin-1)"
1196  :short-name "IBM850"
1197  :ascii-compatible-p t
1198  :code-space [0 255]
1199  :map "IBM850")
1200(define-charset-alias 'cp850 'ibm850)
1201
1202(define-charset 'mik
1203  "Bulgarian DOS codepage"
1204  :short-name "MIK"
1205  :ascii-compatible-p t
1206  :code-space [0 255]
1207  :map "MIK")
1208
1209(define-charset 'ptcp154
1210  "ParaType codepage (Asian Cyrillic)"
1211  :short-name "PT154"
1212  :ascii-compatible-p t
1213  :code-space [0 255]
1214  :mime-charset 'pt154
1215  :map "PTCP154")
1216(define-charset-alias 'pt154 'ptcp154)
1217(define-charset-alias 'cp154 'ptcp154)
1218
1219(define-charset 'gb18030-2-byte
1220  "GB18030 2-byte (0x814E..0xFEFE)"
1221  :code-space [#x40 #xFE #x81 #xFE]
1222  :supplementary-p t
1223  :map "GB180302")
1224
1225(define-charset 'gb18030-4-byte-bmp
1226  "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
1227  :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
1228  :supplementary-p t
1229  :map "GB180304")
1230
1231(define-charset 'gb18030-4-byte-smp
1232  "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
1233  :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
1234  :min-code '(#x9030 . #x8130)
1235  :max-code '(#xE332 . #x9A35)
1236  :supplementary-p t
1237  :code-offset #x10000)
1238
1239(define-charset 'gb18030-4-byte-ext-1
1240  "GB18030 4-byte (0x8431A530-0x8F39FE39)"
1241  :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
1242  :min-code '(#x8431 . #xA530)
1243  :max-code '(#x8F39 . #xFE39)
1244  :supplementary-p t
1245  :code-offset #x200000			; ... #x22484B
1246  )
1247
1248(define-charset 'gb18030-4-byte-ext-2
1249  "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
1250  :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
1251  :min-code '(#xE332 . #x9A36)
1252  :max-code '(#xFE39 . #xFE39)
1253  :supplementary-p t
1254  :code-offset #x22484C			; ... #x279f93
1255  )
1256
1257(define-charset 'gb18030
1258  "GB18030"
1259  :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
1260  :min-code 0
1261  :max-code '(#xFE39 . #xFE39)
1262  :superset '(ascii gb18030-2-byte
1263		    gb18030-4-byte-bmp gb18030-4-byte-smp
1264		    gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
1265
1266(define-charset 'chinese-cns11643-15
1267  "CNS11643 Plane 15 Chinese Traditional"
1268  :short-name  "CNS11643-15"
1269  :long-name "CNS11643-15 (Chinese traditional)"
1270  :code-space [33 126 33 126]
1271  :code-offset #x27A000
1272  :unify-map "CNS-F")
1273
1274(unify-charset 'chinese-gb2312)
1275(unify-charset 'chinese-gbk)
1276(unify-charset 'chinese-cns11643-1)
1277(unify-charset 'chinese-cns11643-2)
1278(unify-charset 'chinese-cns11643-3)
1279(unify-charset 'chinese-cns11643-4)
1280(unify-charset 'chinese-cns11643-5)
1281(unify-charset 'chinese-cns11643-6)
1282(unify-charset 'chinese-cns11643-7)
1283(unify-charset 'chinese-cns11643-15)
1284(unify-charset 'big5)
1285(unify-charset 'chinese-big5-1)
1286(unify-charset 'chinese-big5-2)
1287(unify-charset 'big5-hkscs)
1288(unify-charset 'korean-ksc5601)
1289(unify-charset 'vietnamese-viscii-lower)
1290(unify-charset 'vietnamese-viscii-upper)
1291(unify-charset 'chinese-sisheng)
1292(unify-charset 'ipa)
1293(unify-charset 'tibetan)
1294(unify-charset 'ethiopic)
1295(unify-charset 'indian-is13194)
1296(unify-charset 'japanese-jisx0208-1978)
1297(unify-charset 'japanese-jisx0208)
1298(unify-charset 'japanese-jisx0212)
1299(unify-charset 'japanese-jisx0213-1)
1300(unify-charset 'japanese-jisx0213-2)
1301
1302
1303;; These are tables for translating characters on decoding and
1304;; encoding.
1305;; Fixme: these aren't used now -- should they be?
1306(setq standard-translation-table-for-decode nil)
1307
1308(setq standard-translation-table-for-encode nil)
1309
1310;;; Make fundamental coding systems.
1311
1312;; The coding system `no-conversion' and `undecided' are already
1313;; defined in coding.c as below:
1314;;
1315;; (define-coding-system 'no-conversion
1316;;   "..."
1317;;   :coding-type 'raw-text
1318;;   ...)
1319;; (define-coding-system 'undecided
1320;;   "..."
1321;;   :coding-type 'undecided
1322;;   ...)
1323
1324(define-coding-system-alias 'binary 'no-conversion)
1325(define-coding-system-alias 'unix 'undecided-unix)
1326(define-coding-system-alias 'dos 'undecided-dos)
1327(define-coding-system-alias 'mac 'undecided-mac)
1328
1329(define-coding-system 'prefer-utf-8
1330  "Like `undecided' but prefer UTF-8 when appropriate.
1331On decoding, if the source contains 8-bit codes and they all
1332are valid UTF-8 sequences, detect the source as UTF-8 encoding
1333regardless of the coding priority.
1334On encoding, if the source contains non-ASCII characters, encode them
1335by UTF-8."
1336  :coding-type 'undecided
1337  :mnemonic ?-
1338  :charset-list '(emacs)
1339  :prefer-utf-8 t
1340  :inhibit-null-byte-detection 0
1341  :inhibit-iso-escape-detection 0)
1342
1343(define-coding-system 'raw-text
1344  "Raw text, which means text contains random 8-bit codes.
1345Encoding text with this coding system produces the actual byte
1346sequence of the text in buffers and strings.  An exception is made for
1347characters from the `eight-bit' character set.  Each of them is encoded
1348into a single byte.
1349
1350When you visit a file with this coding, the file is read into a
1351unibyte buffer as is (except for EOL format), thus each byte of a file
1352is treated as a character."
1353  :coding-type 'raw-text
1354  :for-unibyte t
1355  :mnemonic ?t)
1356
1357(define-coding-system 'no-conversion-multibyte
1358  "Like `no-conversion' but don't read a file into a unibyte buffer."
1359  :coding-type 'raw-text
1360  :eol-type 'unix
1361  :mnemonic ?=)
1362
1363(define-coding-system 'iso-latin-1
1364  "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
1365  :coding-type 'charset
1366  :mnemonic ?1
1367  :charset-list '(iso-8859-1)
1368  :mime-charset 'iso-8859-1)
1369
1370(define-coding-system-alias 'iso-8859-1 'iso-latin-1)
1371(define-coding-system-alias 'latin-1 'iso-latin-1)
1372
1373;; Coding systems not specific to each language environment.
1374
1375(define-coding-system 'emacs-mule
1376 "Emacs 21 internal format used in buffer and string."
1377 :coding-type 'emacs-mule
1378 :charset-list 'emacs-mule
1379 :mnemonic ?M)
1380
1381(define-coding-system 'utf-8
1382  "UTF-8 (no signature (BOM))"
1383  :coding-type 'utf-8
1384  :mnemonic ?U
1385  :charset-list '(unicode)
1386  :mime-charset 'utf-8)
1387
1388(define-coding-system 'utf-8-with-signature
1389  "UTF-8 (with signature (BOM))"
1390  :coding-type 'utf-8
1391  :mnemonic ?U
1392  :charset-list '(unicode)
1393  :bom t)
1394
1395(define-coding-system 'utf-8-auto
1396  "UTF-8 (auto-detect signature (BOM))"
1397  :coding-type 'utf-8
1398  :mnemonic ?U
1399  :charset-list '(unicode)
1400  :bom '(utf-8-with-signature . utf-8))
1401
1402(define-coding-system-alias 'mule-utf-8 'utf-8)
1403;; See this page:
1404;; https://docs.microsoft.com/en-us/windows/desktop/intl/code-page-identifiers
1405;; Starting with Windows 10, people are trying to set their systems to
1406;; use UTF-8 , so we had better recognized this alias:
1407(define-coding-system-alias 'cp65001 'utf-8)
1408
1409(define-coding-system 'utf-8-emacs
1410  "Support for all Emacs characters (including non-Unicode characters)."
1411  :coding-type 'utf-8
1412  :mnemonic ?U
1413  :charset-list '(emacs))
1414
1415;; The encoding used internally.  This encoding is meant to be able to save
1416;; any multibyte buffer without losing information.  It can change between
1417;; Emacs releases, tho, so should only be used for internal files.
1418(define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
1419
1420(define-coding-system 'utf-16le
1421  "UTF-16LE (little endian, no signature (BOM))."
1422  :coding-type 'utf-16
1423  :mnemonic ?U
1424  :charset-list '(unicode)
1425  :endian 'little
1426  :mime-text-unsuitable t
1427  :mime-charset 'utf-16le)
1428
1429(define-coding-system 'utf-16be
1430  "UTF-16BE (big endian, no signature (BOM))."
1431  :coding-type 'utf-16
1432  :mnemonic ?U
1433  :charset-list '(unicode)
1434  :endian 'big
1435  :mime-text-unsuitable t
1436  :mime-charset 'utf-16be)
1437
1438(define-coding-system 'utf-16le-with-signature
1439  "UTF-16 (little endian, with signature (BOM))."
1440  :coding-type 'utf-16
1441  :mnemonic ?U
1442  :charset-list '(unicode)
1443  :bom t
1444  :endian 'little
1445  :mime-text-unsuitable t
1446  :mime-charset 'utf-16)
1447
1448(define-coding-system 'utf-16be-with-signature
1449  "UTF-16 (big endian, with signature (BOM))."
1450  :coding-type 'utf-16
1451  :mnemonic ?U
1452  :charset-list '(unicode)
1453  :bom t
1454  :endian 'big
1455  :mime-text-unsuitable t
1456  :mime-charset 'utf-16)
1457
1458(define-coding-system 'utf-16
1459  "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
1460  :coding-type 'utf-16
1461  :mnemonic ?U
1462  :charset-list '(unicode)
1463  :bom '(utf-16le-with-signature . utf-16be-with-signature)
1464  :endian 'big
1465  :mime-text-unsuitable t
1466  :mime-charset 'utf-16)
1467
1468;; Backwards compatibility (old names, also used by Mule-UCS).  We
1469;; prefer the MIME names.
1470(define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
1471(define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
1472
1473
1474(define-coding-system 'iso-2022-7bit
1475  "ISO 2022 based 7-bit encoding using only G0."
1476  :coding-type 'iso-2022
1477  :mnemonic ?J
1478  :charset-list 'iso-2022
1479  :designation [(ascii t) nil nil nil]
1480  :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
1481
1482(define-coding-system 'iso-2022-7bit-ss2
1483  "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
1484  :coding-type 'iso-2022
1485  :mnemonic ?$
1486  :charset-list 'iso-2022
1487  :designation [(ascii 94) nil (nil 96) nil]
1488  :flags '(short ascii-at-eol ascii-at-cntl 7-bit
1489		 designation single-shift composition))
1490
1491(define-coding-system 'iso-2022-7bit-lock
1492  "ISO-2022 coding system using Locking-Shift for 96-charset."
1493  :coding-type 'iso-2022
1494  :mnemonic ?&
1495  :charset-list 'iso-2022
1496  :designation [(ascii 94) (nil 96) nil nil]
1497  :flags '(ascii-at-eol ascii-at-cntl 7-bit
1498			designation locking-shift composition))
1499
1500(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
1501
1502(define-coding-system 'iso-2022-7bit-lock-ss2
1503  "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
1504  :coding-type 'iso-2022
1505  :mnemonic ?i
1506  :charset-list '(ascii
1507		  japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
1508		  korean-ksc5601
1509		  chinese-gb2312
1510		  chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
1511		  chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
1512		  chinese-cns11643-7)
1513  :designation [(ascii 94)
1514		(nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
1515		(nil chinese-cns11643-2)
1516		(nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
1517		     chinese-cns11643-6 chinese-cns11643-7)]
1518  :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
1519		 single-shift init-bol))
1520
1521(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
1522
1523(define-coding-system 'iso-2022-8bit-ss2
1524  "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
1525  :coding-type 'iso-2022
1526  :mnemonic ?@
1527  :charset-list 'iso-2022
1528  :designation [(ascii 94) nil (nil 96) nil]
1529  :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
1530
1531(define-coding-system 'compound-text
1532  "Compound text based generic encoding.
1533This coding system is an extension of X's \"Compound Text Encoding\".
1534It encodes many characters using the normal ISO-2022 designation sequences,
1535but it doesn't support extended segments of CTEXT."
1536  :coding-type 'iso-2022
1537  :mnemonic ?x
1538  :charset-list 'iso-2022
1539  :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1540  :flags '(ascii-at-eol ascii-at-cntl long-form
1541			designation locking-shift single-shift composition)
1542  ;; Fixme: this isn't a valid MIME charset and has to be
1543  ;; special-cased elsewhere  -- fx
1544  :mime-charset 'x-ctext)
1545
1546(define-coding-system-alias  'x-ctext 'compound-text)
1547(define-coding-system-alias  'ctext 'compound-text)
1548
1549;; Same as compound-text, but doesn't produce composition escape
1550;; sequences.  Used in post-read and pre-write conversions of
1551;; compound-text-with-extensions, see mule.el.  Note that this should
1552;; not have a mime-charset property, to prevent it from showing up
1553;; close to the beginning of coding systems ordered by priority.
1554(define-coding-system 'ctext-no-compositions
1555 "Compound text based generic encoding.
1556
1557Like `compound-text', but does not produce escape sequences for compositions."
1558  :coding-type 'iso-2022
1559  :mnemonic ?x
1560  :charset-list 'iso-2022
1561  :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1562  :flags '(ascii-at-eol ascii-at-cntl
1563			designation locking-shift single-shift))
1564
1565(define-coding-system 'compound-text-with-extensions
1566 "Compound text encoding with ICCCM Extended Segment extensions.
1567
1568See the variables `ctext-standard-encodings' and
1569`ctext-non-standard-encodings-alist' for the detail about how
1570extended segments are handled.
1571
1572This coding system should be used only for X selections.  It is inappropriate
1573for decoding and encoding files, process I/O, etc."
1574  :coding-type 'iso-2022
1575  :mnemonic ?x
1576  :charset-list 'iso-2022
1577  :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
1578  :flags '(ascii-at-eol ascii-at-cntl long-form
1579			designation locking-shift single-shift)
1580  :post-read-conversion 'ctext-post-read-conversion
1581  :pre-write-conversion 'ctext-pre-write-conversion
1582  :mime-charset 'x-ctext)
1583
1584(define-coding-system-alias
1585  'x-ctext-with-extensions 'compound-text-with-extensions)
1586(define-coding-system-alias
1587  'ctext-with-extensions 'compound-text-with-extensions)
1588
1589(define-coding-system 'us-ascii
1590  "Encode ASCII as-is and encode non-ASCII characters to `?'."
1591  :coding-type 'charset
1592  :mnemonic ?-
1593  :charset-list '(ascii)
1594  :default-char ??
1595  :mime-charset 'us-ascii)
1596
1597(define-coding-system-alias 'iso-safe 'us-ascii)
1598(define-coding-system-alias 'ascii 'us-ascii)
1599
1600(define-coding-system 'utf-7
1601  "UTF-7 encoding of Unicode (RFC 2152)."
1602  :coding-type 'utf-8
1603  :mnemonic ?U
1604  :mime-charset 'utf-7
1605  :charset-list '(unicode)
1606  :pre-write-conversion 'utf-7-pre-write-conversion
1607  :post-read-conversion 'utf-7-post-read-conversion)
1608;; FIXME: 'define-coding-system' automatically sets :ascii-compatible-p,
1609;; to any encoding whose :coding-type is 'utf-8', but UTF-7 is not ASCII
1610;; compatible, so we override that here (bug#40407).
1611(coding-system-put 'utf-7 :ascii-compatible-p nil)
1612
1613(define-coding-system 'utf-7-imap
1614  "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
1615  :coding-type 'utf-8
1616  :mnemonic ?u
1617  :charset-list '(unicode)
1618  :pre-write-conversion 'utf-7-imap-pre-write-conversion
1619  :post-read-conversion 'utf-7-imap-post-read-conversion)
1620;; See comment for utf-7 above.
1621(coding-system-put 'utf-7-imap :ascii-compatible-p nil)
1622
1623;; Use us-ascii for terminal output if some other coding system is not
1624;; specified explicitly.
1625(set-safe-terminal-coding-system-internal 'us-ascii)
1626
1627;; The other coding-systems are defined in each language specific
1628;; files under lisp/language.
1629
1630;; Normally, set coding system to `undecided' before reading a file.
1631;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
1632;; but we regard them as containing multibyte characters.
1633;; Tar files are not decoded at all, but we treat them as raw bytes.
1634
1635(setq file-coding-system-alist
1636      (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1637      '(("\\.elc\\'" . utf-8-emacs)
1638	("\\.el\\'" . prefer-utf-8)
1639	("\\.utf\\(-8\\)?\\'" . utf-8)
1640	("\\.xml\\'" . xml-find-file-coding-system)
1641	;; We use raw-text for reading loaddefs.el so that if it
1642	;; happens to have DOS or Mac EOLs, they are converted to
1643	;; newlines.  This is required to make the special treatment
1644	;; of the "\ newline" combination in loaddefs.el, which marks
1645	;; the beginning of a doc string, work.
1646	("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
1647	("\\.tar\\'" . (no-conversion . no-conversion))
1648	( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
1649	("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
1650	("" . (undecided . nil)))))
1651
1652
1653;;; Setting coding categories and their priorities.
1654
1655;; This setting is just to read an Emacs Lisp source files which
1656;; contain multilingual text while dumping Emacs.  More appropriate
1657;; values are set by the command `set-language-environment' for each
1658;; language environment.
1659
1660(set-coding-system-priority
1661 'iso-latin-1
1662 'utf-8
1663 'iso-2022-7bit
1664 )
1665
1666
1667;;; Miscellaneous settings.
1668
1669;; Make all multibyte characters self-insert.
1670(set-char-table-range (nth 1 global-map)
1671		      (cons 128 (max-char))
1672		      'self-insert-command)
1673
1674(aset latin-extra-code-table ?\221 t)
1675(aset latin-extra-code-table ?\222 t)
1676(aset latin-extra-code-table ?\223 t)
1677(aset latin-extra-code-table ?\224 t)
1678(aset latin-extra-code-table ?\225 t)
1679(aset latin-extra-code-table ?\226 t)
1680
1681(defcustom password-word-equivalents
1682  '("password" "passcode" "passphrase" "pass phrase" "pin"
1683    "decryption key" "encryption key" ; From ccrypt.
1684    ; These are sorted according to the GNU en_US locale.
1685    "암호"		; ko
1686    "パスワード"	; ja
1687    "ପ୍ରବେଶ ସଙ୍କେତ"	; or
1688    "ពាក្យសម្ងាត់"		; km
1689    "adgangskode"	; da
1690    "contraseña"	; es
1691    "contrasenya"	; ca
1692    "geslo"		; sl
1693    "hasło"		; pl
1694    "heslo"		; cs, sk
1695    "iphasiwedi"	; zu
1696    "jelszó"		; hu
1697    "lösenord"		; sv
1698    "lozinka"		; hr, sr
1699    "mật khẩu"		; vi
1700    "mot de passe"	; fr
1701    "parola"		; tr
1702    "pasahitza"		; eu
1703    "passord"		; nb
1704    "passwort"		; de
1705    "pasvorto"		; eo
1706    "salasana"		; fi
1707    "senha"		; pt
1708    "slaptažodis"	; lt
1709    "wachtwoord"	; nl
1710    "كلمة السر"		; ar
1711    "ססמה"		; he
1712    "лозинка"		; sr
1713    "пароль"		; kk, ru, uk
1714    "गुप्तशब्द"		; mr
1715    "शब्दकूट"		; hi
1716    "પાસવર્ડ"		; gu
1717    "సంకేతపదము"		; te
1718    "ਪਾਸਵਰਡ"		; pa
1719    "ಗುಪ್ತಪದ"		; kn
1720    "கடவுச்சொல்"		; ta
1721    "അടയാളവാക്ക്"		; ml
1722    "গুপ্তশব্দ"		; as
1723    "পাসওয়ার্ড"		; bn_IN
1724    "රහස්පදය"		; si
1725    "密码"		; zh_CN
1726    "密碼"		; zh_TW
1727    )
1728  "List of words equivalent to \"password\".
1729This is used by Shell mode and other parts of Emacs to recognize
1730password prompts, including prompts in languages other than
1731English.  Different case choices should not be assumed to be
1732included; callers should bind `case-fold-search' to t."
1733  :type '(repeat string)
1734  :version "27.1"
1735  :group 'processes)
1736
1737;; The old code-pages library is obsoleted by coding systems based on
1738;; the charsets defined in this file but might be required by user
1739;; code.
1740(provide 'code-pages)
1741
1742;;; mule-conf.el ends here
1743