1;;; mule-conf.el --- configure multilingual environment -*- lexical-binding: t; -*- 2 3;; Copyright (C) 1997-2021 Free Software Foundation, Inc. 4;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 5;; National Institute of Advanced Industrial Science and Technology (AIST) 6;; Registration Number H14PRO021 7;; Copyright (C) 2003 8;; National Institute of Advanced Industrial Science and Technology (AIST) 9;; Registration Number H13PRO009 10 11;; Keywords: i18n, mule, multilingual, character set, coding system 12 13;; This file is part of GNU Emacs. 14 15;; GNU Emacs is free software: you can redistribute it and/or modify 16;; it under the terms of the GNU General Public License as published by 17;; the Free Software Foundation, either version 3 of the License, or 18;; (at your option) any later version. 19 20;; GNU Emacs is distributed in the hope that it will be useful, 21;; but WITHOUT ANY WARRANTY; without even the implied warranty of 22;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23;; GNU General Public License for more details. 24 25;; You should have received a copy of the GNU General Public License 26;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 27 28;;; Commentary: 29 30;; This file defines the Emacs charsets and some basic coding systems. 31;; Other coding systems are defined in the files in directory 32;; lisp/language. 33 34;;; Code: 35 36;;; Remarks 37 38;; The ISO-IR registry is maintained by the Information Processing 39;; Society of Japan/Information Technology Standards Commission of 40;; Japan (IPSJ/ITSCJ) at https://www.itscj.ipsj.or.jp/itscj_english/. 41;; Standards docs equivalent to iso-2022 and iso-8859 are at 42;; https://www.ecma.ch/. 43 44;; FWIW, https://www.microsoft.com/globaldev/ lists the following for 45;; MS Windows, which are presumably the only charsets we really need 46;; to worry about on such systems: 47;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866 48;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 49;; 1258, 874, 932, 936, 949, 950 50 51;;; Definitions of character sets. 52 53;; The charsets `ascii', `unicode' and `eight-bit' are already defined 54;; in charset.c as below: 55;; 56;; (define-charset 'ascii 57;; "" 58;; :dimension 1 59;; :code-space [0 127] 60;; :iso-final-char ?B 61;; :ascii-compatible-p t 62;; :emacs-mule-id 0 63;; :code-offset 0) 64;; 65;; (define-charset 'unicode 66;; "" 67;; :dimension 3 68;; :code-space [0 255 0 255 0 16] 69;; :ascii-compatible-p t 70;; :code-offset 0) 71;; 72;; (define-charset 'emacs 73;; "" 74;; :dimension 3 75;; :code-space [0 255 0 255 0 63] 76;; :ascii-compatible-p t 77;; :supplementary-p t 78;; :code-offset 0) 79;; 80;; (define-charset 'eight-bit 81;; "" 82;; :dimension 1 83;; :code-space [128 255] 84;; :code-offset #x3FFF80) 85;; 86;; We now set :docstring, :short-name, and :long-name properties. 87 88(put-charset-property 89 'ascii :docstring "ASCII (ISO646 IRV)") 90(put-charset-property 91 'ascii :short-name "ASCII") 92(put-charset-property 93 'ascii :long-name "ASCII (ISO646 IRV)") 94(put-charset-property 95 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)") 96(put-charset-property 97 'iso-8859-1 :short-name "Latin-1") 98(put-charset-property 99 'iso-8859-1 :long-name "Latin-1") 100(put-charset-property 101 'unicode :docstring "Unicode (ISO10646)") 102(put-charset-property 103 'unicode :short-name "Unicode") 104(put-charset-property 105 'unicode :long-name "Unicode (ISO10646)") 106(put-charset-property 107 'emacs :docstring "Full Emacs charset (excluding eight bit chars)") 108(put-charset-property 109 'emacs :short-name "Emacs") 110(put-charset-property 111 'emacs :long-name "Emacs") 112 113(put-charset-property 'eight-bit :docstring "Raw bytes 128-255") 114(put-charset-property 'eight-bit :short-name "Raw bytes") 115 116(define-charset-alias 'ucs 'unicode) 117 118(define-charset 'latin-iso8859-1 119 "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" 120 :short-name "RHP of Latin-1" 121 :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" 122 :iso-final-char ?A 123 :emacs-mule-id 129 124 :code-space [32 127] 125 :code-offset 160) 126 127;; Name perhaps not ideal, but is XEmacs-compatible. 128(define-charset 'control-1 129 "8-bit control code (0x80..0x9F)" 130 :short-name "8-bit control code" 131 :code-space [128 159] 132 :code-offset 128) 133 134(define-charset 'eight-bit-control 135 "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)" 136 :short-name "Raw bytes 0x80..0x9F" 137 :supplementary-p t 138 :code-space [128 159] 139 :code-offset #x3FFF80) ; see character.h 140 141(define-charset 'eight-bit-graphic 142 "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)" 143 :short-name "Raw bytes 0xA0..0xFF" 144 :supplementary-p t 145 :code-space [160 255] 146 :code-offset #x3FFFA0) ; see character.h 147 148(defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname 149 iso-ir iso-final 150 emacs-mule-id map) 151 (declare (indent defun)) 152 `(progn 153 (define-charset ,symbol 154 ,name 155 :short-name ,nickname 156 :long-name ,name 157 :ascii-compatible-p t 158 :code-space [0 255] 159 :map ,map) 160 (if ,iso-symbol 161 (define-charset ,iso-symbol 162 (if ,iso-ir 163 (format "Right-Hand Part of %s (%s): ISO-IR-%d" 164 ,name ,nickname ,iso-ir) 165 (format "Right-Hand Part of %s (%s)" ,name ,nickname)) 166 :short-name (format "RHP of %s" ,name) 167 :long-name (format "RHP of %s (%s)" ,name ,nickname) 168 :iso-final-char ,iso-final 169 :emacs-mule-id ,emacs-mule-id 170 :code-space [32 127] 171 :subset (list ,symbol 160 255 -128))))) 172 173(define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2 174 "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2") 175 176(define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3 177 "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3") 178 179(define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4 180 "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4") 181 182(define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5 183 "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5") 184 185(define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6 186 "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6") 187 188(define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7 189 "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7") 190 191(define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8 192 "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8") 193 194(define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9 195 "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9") 196 197(define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10 198 "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10") 199 200;; http://www.nectec.or.th/it-standards/iso8859-11/ 201;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620 202;; plus nbsp 203(define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11 204 "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11") 205 206;; 8859-12 doesn't (yet?) exist. 207 208(define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13 209 "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13") 210 211(define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14 212 "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14") 213 214(define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15 215 "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15") 216 217(define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16 218 "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16") 219 220;; No point in keeping it around. 221(fmakunbound 'define-iso-single-byte-charset) 222 223;; Can this be shared with 8859-11? 224;; N.b. not all of these are defined in Unicode. 225(define-charset 'thai-tis620 226 "MULE charset for TIS620.2533" 227 :short-name "TIS620.2533" 228 :iso-final-char ?T 229 :emacs-mule-id 133 230 :code-space [32 127] 231 :code-offset #x0E00) 232 233(define-charset 'tis620-2533 234 "TIS620.2533, a.k.a. TIS-620. Like `thai-iso8859-11', but without NBSP." 235 :short-name "TIS620.2533" 236 :ascii-compatible-p t 237 :code-space [0 255] 238 :superset '(ascii (thai-tis620 . 128))) 239 240(define-charset 'jisx0201 241 "JISX0201" 242 :short-name "JISX0201" 243 :code-space [0 #xDF] 244 :map "JISX0201") 245 246(define-charset 'latin-jisx0201 247 "Roman Part of JISX0201.1976" 248 :short-name "JISX0201 Roman" 249 :long-name "Japanese Roman (JISX0201.1976)" 250 :iso-final-char ?J 251 :emacs-mule-id 138 252 :supplementary-p t 253 :code-space [33 126] 254 :subset '(jisx0201 33 126 0)) 255 256(define-charset 'katakana-jisx0201 257 "Katakana Part of JISX0201.1976" 258 :short-name "JISX0201 Katakana" 259 :long-name "Japanese Katakana (JISX0201.1976)" 260 :iso-final-char ?I 261 :emacs-mule-id 137 262 :supplementary-p t 263 :code-space [33 126] 264 :subset '(jisx0201 161 254 -128)) 265 266(define-charset 'chinese-gb2312 267 "GB2312 Chinese simplified: ISO-IR-58" 268 :short-name "GB2312" 269 :long-name "GB2312: ISO-IR-58" 270 :iso-final-char ?A 271 :emacs-mule-id 145 272 :code-space [33 126 33 126] 273 :code-offset #x110000 274 :unify-map "GB2312") 275 276(define-charset 'chinese-gbk 277 "GBK Chinese simplified." 278 :short-name "GBK" 279 :code-space [#x40 #xFE #x81 #xFE] 280 :code-offset #x160000 281 :unify-map "GBK") 282(define-charset-alias 'cp936 'chinese-gbk) 283(define-charset-alias 'windows-936 'chinese-gbk) 284 285(define-charset 'chinese-cns11643-1 286 "CNS11643 Plane 1 Chinese traditional: ISO-IR-171" 287 :short-name "CNS11643-1" 288 :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171" 289 :iso-final-char ?G 290 :emacs-mule-id 149 291 :code-space [33 126 33 126] 292 :code-offset #x114000 293 :unify-map "CNS-1") 294 295(define-charset 'chinese-cns11643-2 296 "CNS11643 Plane 2 Chinese traditional: ISO-IR-172" 297 :short-name "CNS11643-2" 298 :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172" 299 :iso-final-char ?H 300 :emacs-mule-id 150 301 :code-space [33 126 33 126] 302 :code-offset #x118000 303 :unify-map "CNS-2") 304 305(define-charset 'chinese-cns11643-3 306 "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183" 307 :short-name "CNS11643-3" 308 :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183" 309 :iso-final-char ?I 310 :code-space [33 126 33 126] 311 :emacs-mule-id 246 312 :code-offset #x11C000 313 :unify-map "CNS-3") 314 315(define-charset 'chinese-cns11643-4 316 "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184" 317 :short-name "CNS11643-4" 318 :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184" 319 :iso-final-char ?J 320 :emacs-mule-id 247 321 :code-space [33 126 33 126] 322 :code-offset #x120000 323 :unify-map "CNS-4") 324 325(define-charset 'chinese-cns11643-5 326 "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185" 327 :short-name "CNS11643-5" 328 :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185" 329 :iso-final-char ?K 330 :emacs-mule-id 248 331 :code-space [33 126 33 126] 332 :code-offset #x124000 333 :unify-map "CNS-5") 334 335(define-charset 'chinese-cns11643-6 336 "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186" 337 :short-name "CNS11643-6" 338 :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186" 339 :iso-final-char ?L 340 :emacs-mule-id 249 341 :code-space [33 126 33 126] 342 :code-offset #x128000 343 :unify-map "CNS-6") 344 345(define-charset 'chinese-cns11643-7 346 "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187" 347 :short-name "CNS11643-7" 348 :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187" 349 :iso-final-char ?M 350 :emacs-mule-id 250 351 :code-space [33 126 33 126] 352 :code-offset #x12C000 353 :unify-map "CNS-7") 354 355(define-charset 'big5 356 "Big5 (Chinese traditional)" 357 :short-name "Big5" 358 :code-space [#x40 #xFE #xA1 #xFE] 359 :code-offset #x130000 360 :unify-map "BIG5") 361;; Fixme: AKA cp950 according to 362;; <URL:https://www.microsoft.com/globaldev/reference/WinCP.asp>. Is 363;; that correct? 364 365(define-charset 'chinese-big5-1 366 "Frequently used part (A141-C67E) of Big5 (Chinese traditional)" 367 :short-name "Big5 (Level-1)" 368 :long-name "Big5 (Level-1) A141-C67F" 369 :iso-final-char ?0 370 :emacs-mule-id 152 371 :supplementary-p t 372 :code-space [#x21 #x7E #x21 #x7E] 373 :code-offset #x135000 374 :unify-map "BIG5-1") 375 376(define-charset 'chinese-big5-2 377 "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)" 378 :short-name "Big5 (Level-2)" 379 :long-name "Big5 (Level-2) C940-FEFE" 380 :iso-final-char ?1 381 :emacs-mule-id 153 382 :supplementary-p t 383 :code-space [#x21 #x7E #x21 #x7E] 384 :code-offset #x137800 385 :unify-map "BIG5-2") 386 387(define-charset 'japanese-jisx0208 388 "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87" 389 :short-name "JISX0208" 390 :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87" 391 :iso-final-char ?B 392 :emacs-mule-id 146 393 :code-space [33 126 33 126] 394 :code-offset #x140000 395 :unify-map "JISX0208") 396 397(define-charset 'japanese-jisx0208-1978 398 "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42" 399 :short-name "JISX0208.1978" 400 :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42" 401 :iso-final-char ?@ 402 :emacs-mule-id 144 403 :code-space [33 126 33 126] 404 :code-offset #x144000 405 :unify-map "JISC6226") 406 407(define-charset 'japanese-jisx0212 408 "JISX0212 Japanese supplement: ISO-IR-159" 409 :short-name "JISX0212" 410 :long-name "JISX0212 (Japanese): ISO-IR-159" 411 :iso-final-char ?D 412 :emacs-mule-id 148 413 :code-space [33 126 33 126] 414 :code-offset #x148000 415 :unify-map "JISX0212") 416 417;; Note that jisx0213 contains characters not in Unicode (3.2?). It's 418;; arguable whether it should have a unify-map. 419(define-charset 'japanese-jisx0213-1 420 "JISX0213.2000 Plane 1 (Japanese)" 421 :short-name "JISX0213-1" 422 :iso-final-char ?O 423 :emacs-mule-id 151 424 :unify-map "JISX2131" 425 :code-space [33 126 33 126] 426 :code-offset #x14C000) 427 428(define-charset 'japanese-jisx0213-2 429 "JISX0213.2000 Plane 2 (Japanese)" 430 :short-name "JISX0213-2" 431 :iso-final-char ?P 432 :emacs-mule-id 254 433 :unify-map "JISX2132" 434 :code-space [33 126 33 126] 435 :code-offset #x150000) 436 437(define-charset 'japanese-jisx0213-a 438 "JISX0213.2004 adds these characters to JISX0213.2000." 439 :short-name "JISX0213A" 440 :dimension 2 441 :code-space [33 126 33 126] 442 :supplementary-p t 443 :map "JISX213A") 444 445(define-charset 'japanese-jisx0213.2004-1 446 "JISX0213.2004 Plane1 (Japanese)" 447 :short-name "JISX0213.2004-1" 448 :dimension 2 449 :code-space [33 126 33 126] 450 :iso-final-char ?Q 451 :superset '(japanese-jisx0213-a japanese-jisx0213-1)) 452 453(define-charset 'katakana-sjis 454 "Katakana part of Shift-JIS" 455 :dimension 1 456 :code-space [#xA1 #xDF] 457 :subset '(jisx0201 #xA1 #xDF 0) 458 :supplementary-p t) 459 460(define-charset 'cp932-2-byte 461 "2-byte part of CP932" 462 :dimension 2 463 :map "CP932-2BYTE" 464 :code-space [#x40 #xFC #x81 #xFC] 465 :supplementary-p t) 466 467(define-charset 'cp932 468 "CP932 (Microsoft shift-jis)" 469 :code-space [#x00 #xFF #x00 #xFE] 470 :short-name "CP932" 471 :superset '(ascii katakana-sjis cp932-2-byte)) 472 473(define-charset 'korean-ksc5601 474 "KSC5601 Korean Hangul and Hanja: ISO-IR-149" 475 :short-name "KSC5601" 476 :long-name "KSC5601 (Korean): ISO-IR-149" 477 :iso-final-char ?C 478 :emacs-mule-id 147 479 :code-space [33 126 33 126] 480 :code-offset #x279f94 ; ... #x27c217 481 :unify-map "KSC5601") 482 483(define-charset 'big5-hkscs 484 "Big5-HKSCS (Chinese traditional, Hong Kong supplement)" 485 :short-name "Big5" 486 :code-space [#x40 #xFE #xA1 #xFE] 487 :code-offset #x27c218 ; ... #x280839 488 :unify-map "BIG5-HKSCS") 489 490(define-charset 'cp949-2-byte 491 "2-byte part of CP949" 492 :dimension 2 493 :map "CP949-2BYTE" 494 :code-space [#x41 #xFE #x81 #xFD] 495 :supplementary-p t) 496 497(define-charset 'cp949 498 "CP949 (Korean)" 499 :short-name "CP949" 500 :long-name "CP949 (Korean)" 501 :code-space [#x00 #xFE #x00 #xFD] 502 :superset '(ascii cp949-2-byte)) 503 504(define-charset 'chinese-sisheng 505 "SiSheng characters for PinYin/ZhuYin" 506 :short-name "SiSheng" 507 :long-name "SiSheng (PinYin/ZhuYin)" 508 :iso-final-char ?0 509 :emacs-mule-id 160 510 :code-space [33 126] 511 :unify-map "MULE-sisheng" 512 :supplementary-p t 513 :code-offset #x200000) 514 515;; A subset of the 1989 version of IPA. It consists of the consonant 516;; signs used in English, French, German and Italian, and all vowels 517;; signs in the table. [says old MULE doc] 518(define-charset 'ipa 519 "IPA (International Phonetic Association)" 520 :short-name "IPA" 521 :iso-final-char ?0 522 :emacs-mule-id 161 523 :unify-map "MULE-ipa" 524 :code-space [32 127] 525 :supplementary-p t 526 :code-offset #x200080) 527 528(define-charset 'viscii 529 "VISCII1.1" 530 :short-name "VISCII" 531 :long-name "VISCII 1.1" 532 :code-space [0 255] 533 :map "VISCII") 534 535(define-charset 'vietnamese-viscii-lower 536 "VISCII1.1 lower-case" 537 :short-name "VISCII lower" 538 :long-name "VISCII lower-case" 539 :iso-final-char ?1 540 :emacs-mule-id 162 541 :code-space [32 127] 542 :code-offset #x200200 543 :supplementary-p t 544 :unify-map "MULE-lviscii") 545 546(define-charset 'vietnamese-viscii-upper 547 "VISCII1.1 upper-case" 548 :short-name "VISCII upper" 549 :long-name "VISCII upper-case" 550 :iso-final-char ?2 551 :emacs-mule-id 163 552 :code-space [32 127] 553 :code-offset #x200280 554 :supplementary-p t 555 :unify-map "MULE-uviscii") 556 557(define-charset 'vscii 558 "VSCII1.1 (TCVN-5712 VN1)" 559 :short-name "VSCII" 560 :code-space [0 255] 561 :map "VSCII") 562 563(define-charset-alias 'tcvn-5712 'vscii) 564 565;; Fixme: see note in tcvn.map about combining characters 566(define-charset 'vscii-2 567 "VSCII-2 (TCVN-5712 VN2)" 568 :code-space [0 255] 569 :map "VSCII-2") 570 571(define-charset 'koi8-r 572 "KOI8-R" 573 :short-name "KOI8-R" 574 :ascii-compatible-p t 575 :code-space [0 255] 576 :map "KOI8-R") 577 578(define-charset-alias 'koi8 'koi8-r) 579 580(define-charset 'alternativnyj 581 "ALTERNATIVNYJ" 582 :short-name "alternativnyj" 583 :ascii-compatible-p t 584 :code-space [0 255] 585 :map "ALTERNATIVNYJ") 586 587(define-charset 'cp866 588 "CP866" 589 :short-name "cp866" 590 :ascii-compatible-p t 591 :code-space [0 255] 592 :map "IBM866") 593(define-charset-alias 'ibm866 'cp866) 594 595(define-charset 'koi8-u 596 "KOI8-U" 597 :short-name "KOI8-U" 598 :ascii-compatible-p t 599 :code-space [0 255] 600 :map "KOI8-U") 601 602(define-charset 'koi8-t 603 "KOI8-T" 604 :short-name "KOI8-T" 605 :ascii-compatible-p t 606 :code-space [0 255] 607 :map "KOI8-T") 608 609(define-charset 'georgian-ps 610 "GEORGIAN-PS" 611 :short-name "GEORGIAN-PS" 612 :ascii-compatible-p t 613 :code-space [0 255] 614 :map "KA-PS") 615 616(define-charset 'georgian-academy 617 "GEORGIAN-ACADEMY" 618 :short-name "GEORGIAN-ACADEMY" 619 :ascii-compatible-p t 620 :code-space [0 255] 621 :map "KA-ACADEMY") 622 623(define-charset 'windows-1250 624 "WINDOWS-1250 (Central Europe)" 625 :short-name "WINDOWS-1250" 626 :ascii-compatible-p t 627 :code-space [0 255] 628 :map "CP1250") 629(define-charset-alias 'cp1250 'windows-1250) 630 631(define-charset 'windows-1251 632 "WINDOWS-1251 (Cyrillic)" 633 :short-name "WINDOWS-1251" 634 :ascii-compatible-p t 635 :code-space [0 255] 636 :map "CP1251") 637(define-charset-alias 'cp1251 'windows-1251) 638 639(define-charset 'windows-1252 640 "WINDOWS-1252 (Latin I)" 641 :short-name "WINDOWS-1252" 642 :ascii-compatible-p t 643 :code-space [0 255] 644 :map "CP1252") 645(define-charset-alias 'cp1252 'windows-1252) 646 647(define-charset 'windows-1253 648 "WINDOWS-1253 (Greek)" 649 :short-name "WINDOWS-1253" 650 :ascii-compatible-p t 651 :code-space [0 255] 652 :map "CP1253") 653(define-charset-alias 'cp1253 'windows-1253) 654 655(define-charset 'windows-1254 656 "WINDOWS-1254 (Turkish)" 657 :short-name "WINDOWS-1254" 658 :ascii-compatible-p t 659 :code-space [0 255] 660 :map "CP1254") 661(define-charset-alias 'cp1254 'windows-1254) 662 663(define-charset 'windows-1255 664 "WINDOWS-1255 (Hebrew)" 665 :short-name "WINDOWS-1255" 666 :ascii-compatible-p t 667 :code-space [0 255] 668 :map "CP1255") 669(define-charset-alias 'cp1255 'windows-1255) 670 671(define-charset 'windows-1256 672 "WINDOWS-1256 (Arabic)" 673 :short-name "WINDOWS-1256" 674 :ascii-compatible-p t 675 :code-space [0 255] 676 :map "CP1256") 677(define-charset-alias 'cp1256 'windows-1256) 678 679(define-charset 'windows-1257 680 "WINDOWS-1257 (Baltic)" 681 :short-name "WINDOWS-1257" 682 :ascii-compatible-p t 683 :code-space [0 255] 684 :map "CP1257") 685(define-charset-alias 'cp1257 'windows-1257) 686 687(define-charset 'windows-1258 688 "WINDOWS-1258 (Viet Nam)" 689 :short-name "WINDOWS-1258" 690 :ascii-compatible-p t 691 :code-space [0 255] 692 :map "CP1258") 693(define-charset-alias 'cp1258 'windows-1258) 694 695(define-charset 'next 696 "NEXT" 697 :short-name "NEXT" 698 :ascii-compatible-p t 699 :code-space [0 255] 700 :map "NEXTSTEP") 701 702(define-charset 'cp1125 703 "CP1125" 704 :short-name "CP1125" 705 :code-space [0 255] 706 :ascii-compatible-p t 707 :map "CP1125") 708(define-charset-alias 'ruscii 'cp1125) 709;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> 710(define-charset-alias 'cp866u 'cp1125) 711 712;; Fixme: C.f. iconv, https://czyborra.com/charsets/codepages.html 713;; shows this as not ASCII compatible, with various graphics in 714;; 0x01-0x1F. 715(define-charset 'cp437 716 "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)" 717 :short-name "CP437" 718 :code-space [0 255] 719 :ascii-compatible-p t 720 :map "IBM437") 721 722(define-charset 'cp720 723 "CP720 (Arabic)" 724 :short-name "CP720" 725 :code-space [0 255] 726 :ascii-compatible-p t 727 :map "CP720") 728 729(define-charset 'cp737 730 "CP737 (PC Greek)" 731 :short-name "CP737" 732 :code-space [0 255] 733 :ascii-compatible-p t 734 :map "CP737") 735 736(define-charset 'cp775 737 "CP775 (PC Baltic)" 738 :short-name "CP775" 739 :code-space [0 255] 740 :ascii-compatible-p t 741 :map "CP775") 742 743(define-charset 'cp851 744 "CP851 (Greek)" 745 :short-name "CP851" 746 :code-space [0 255] 747 :ascii-compatible-p t 748 :map "IBM851") 749 750(define-charset 'cp852 751 "CP852 (MS-DOS Latin-2)" 752 :short-name "CP852" 753 :code-space [0 255] 754 :ascii-compatible-p t 755 :map "IBM852") 756 757(define-charset 'cp855 758 "CP855 (IBM Cyrillic)" 759 :short-name "CP855" 760 :code-space [0 255] 761 :ascii-compatible-p t 762 :map "IBM855") 763 764(define-charset 'cp857 765 "CP857 (IBM Turkish)" 766 :short-name "CP857" 767 :code-space [0 255] 768 :ascii-compatible-p t 769 :map "IBM857") 770 771(define-charset 'cp858 772 "CP858 (Multilingual Latin I + Euro)" 773 :short-name "CP858" 774 :code-space [0 255] 775 :ascii-compatible-p t 776 :map "CP858") 777(define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858 778 779(define-charset 'cp860 780 "CP860 (MS-DOS Portuguese)" 781 :short-name "CP860" 782 :code-space [0 255] 783 :ascii-compatible-p t 784 :map "IBM860") 785 786(define-charset 'cp861 787 "CP861 (MS-DOS Icelandic)" 788 :short-name "CP861" 789 :code-space [0 255] 790 :ascii-compatible-p t 791 :map "IBM861") 792 793(define-charset 'cp862 794 "CP862 (PC Hebrew)" 795 :short-name "CP862" 796 :code-space [0 255] 797 :ascii-compatible-p t 798 :map "IBM862") 799 800(define-charset 'cp863 801 "CP863 (MS-DOS Canadian French)" 802 :short-name "CP863" 803 :code-space [0 255] 804 :ascii-compatible-p t 805 :map "IBM863") 806 807(define-charset 'cp864 808 "CP864 (PC Arabic)" 809 :short-name "CP864" 810 :code-space [0 255] 811 :ascii-compatible-p t 812 :map "IBM864") 813 814(define-charset 'cp865 815 "CP865 (MS-DOS Nordic)" 816 :short-name "CP865" 817 :code-space [0 255] 818 :ascii-compatible-p t 819 :map "IBM865") 820 821(define-charset 'cp869 822 "CP869 (IBM Modern Greek)" 823 :short-name "CP869" 824 :code-space [0 255] 825 :ascii-compatible-p t 826 :map "IBM869") 827 828(define-charset 'cp874 829 "CP874 (IBM Thai)" 830 :short-name "CP874" 831 :code-space [0 255] 832 :ascii-compatible-p t 833 :map "IBM874") 834 835;; For Arabic, we need three different types of character sets. 836;; Digits are of direction left-to-right and of width 1-column. 837;; Others are of direction right-to-left and of width 1-column or 838;; 2-column. 839(define-charset 'arabic-digit 840 "Arabic digit" 841 :short-name "Arabic digit" 842 :iso-final-char ?2 843 :emacs-mule-id 164 844 :supplementary-p t 845 :code-space [34 42] 846 :code-offset #x0600) 847 848(define-charset 'arabic-1-column 849 "Arabic 1-column" 850 :short-name "Arabic 1-col" 851 :long-name "Arabic 1-column" 852 :iso-final-char ?3 853 :emacs-mule-id 165 854 :supplementary-p t 855 :code-space [33 126] 856 :code-offset #x200100) 857 858(define-charset 'arabic-2-column 859 "Arabic 2-column" 860 :short-name "Arabic 2-col" 861 :long-name "Arabic 2-column" 862 :iso-final-char ?4 863 :emacs-mule-id 224 864 :supplementary-p t 865 :code-space [33 126] 866 :code-offset #x200180) 867 868;; Lao script. 869;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF. 870;; Not all of them are defined in Unicode. 871(define-charset 'lao 872 "Lao characters (ISO10646 0E81..0EDF)" 873 :short-name "Lao" 874 :iso-final-char ?1 875 :emacs-mule-id 167 876 :supplementary-p t 877 :code-space [33 126] 878 :code-offset #x0E81) 879 880(define-charset 'mule-lao 881 "Lao characters (ISO10646 0E81..0EDF)" 882 :short-name "Lao" 883 :code-space [0 255] 884 :supplementary-p t 885 :superset '(ascii eight-bit-control (lao . 128))) 886 887 888;; Indian scripts. Symbolic charset for data exchange. Glyphs are 889;; not assigned. They are automatically converted to each Indian 890;; script which IS-13194 supports. 891 892(define-charset 'indian-is13194 893 "7-bit representation of IS 13194 (ISCII) for Devanagari" 894 :short-name "IS 13194 (DEV)" 895 :long-name "Indian IS 13194 (DEV)" 896 :iso-final-char ?5 897 :emacs-mule-id 225 898 :supplementary-p t 899 :code-space [33 126] 900 :code-offset #x180000 901 :unify-map "MULE-is13194") 902 903(let ((code-offset #x180100)) 904 (dolist (script '(devanagari sanskrit bengali tamil telugu assamese 905 oriya kannada malayalam gujarati punjabi)) 906 (define-charset (intern (format "%s-cdac" script)) 907 (format 908 "Glyphs of %s script for CDAC font. Subset of `indian-glyph'." 909 (capitalize (symbol-name script))) 910 :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script))) 911 :supplementary-p t 912 :code-space [0 255] 913 :code-offset code-offset) 914 (setq code-offset (+ code-offset #x100))) 915 916 (dolist (script '(devanagari bengali punjabi gujarati 917 oriya tamil telugu kannada malayalam)) 918 (define-charset (intern (format "%s-akruti" script)) 919 (format 920 "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'." 921 (capitalize (symbol-name script))) 922 :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script))) 923 :supplementary-p t 924 :code-space [0 255] 925 :code-offset code-offset) 926 (setq code-offset (+ code-offset #x100)))) 927 928(define-charset 'indian-glyph 929 "Glyphs for Indian characters." 930 :short-name "Indian glyph" 931 :iso-final-char ?4 932 :emacs-mule-id 240 933 :supplementary-p t 934 :code-space [32 127 32 127] 935 :code-offset #x180100) 936 937;; Actual Glyph for 1-column width. 938(define-charset 'indian-1-column 939 "Indian charset for 1-column width glyphs." 940 :short-name "Indian 1-col" 941 :long-name "Indian 1 Column" 942 :iso-final-char ?6 943 :emacs-mule-id 251 944 :supplementary-p t 945 :code-space [33 126 33 126] 946 :code-offset #x184000) 947 948;; Actual Glyph for 2-column width. 949(define-charset 'indian-2-column 950 "Indian charset for 2-column width glyphs." 951 :short-name "Indian 2-col" 952 :long-name "Indian 2 Column" 953 :iso-final-char ?5 954 :emacs-mule-id 251 955 :supplementary-p t 956 :code-space [33 126 33 126] 957 :code-offset #x184000) 958 959(define-charset 'tibetan 960 "Tibetan characters" 961 :iso-final-char ?7 962 :short-name "Tibetan 2-col" 963 :long-name "Tibetan 2 column" 964 :iso-final-char ?7 965 :emacs-mule-id 252 966 :unify-map "MULE-tibetan" 967 :supplementary-p t 968 :code-space [33 126 33 37] 969 :code-offset #x190000) 970 971(define-charset 'tibetan-1-column 972 "Tibetan 1 column glyph" 973 :short-name "Tibetan 1-col" 974 :long-name "Tibetan 1 column" 975 :iso-final-char ?8 976 :emacs-mule-id 241 977 :supplementary-p t 978 :code-space [33 126 33 37] 979 :code-offset #x190000) 980 981;; Subsets of Unicode. 982(define-charset 'mule-unicode-2500-33ff 983 "Unicode characters of the range U+2500..U+33FF." 984 :short-name "Unicode subset 2" 985 :long-name "Unicode subset (U+2500..U+33FF)" 986 :iso-final-char ?2 987 :emacs-mule-id 242 988 :supplementary-p t 989 :code-space [#x20 #x7f #x20 #x47] 990 :code-offset #x2500) 991 992(define-charset 'mule-unicode-e000-ffff 993 "Unicode characters of the range U+E000..U+FFFF." 994 :short-name "Unicode subset 3" 995 :long-name "Unicode subset (U+E000+FFFF)" 996 :iso-final-char ?3 997 :emacs-mule-id 243 998 :supplementary-p t 999 :code-space [#x20 #x7F #x20 #x75] 1000 :code-offset #xE000 1001 :max-code 30015) ; U+FFFF 1002 1003(define-charset 'mule-unicode-0100-24ff 1004 "Unicode characters of the range U+0100..U+24FF." 1005 :short-name "Unicode subset" 1006 :long-name "Unicode subset (U+0100..U+24FF)" 1007 :iso-final-char ?1 1008 :emacs-mule-id 244 1009 :supplementary-p t 1010 :code-space [#x20 #x7F #x20 #x7F] 1011 :code-offset #x100) 1012 1013(define-charset 'unicode-bmp 1014 "Unicode Basic Multilingual Plane (U+0000..U+FFFF)" 1015 :short-name "Unicode BMP" 1016 :code-space [0 255 0 255] 1017 :code-offset 0) 1018 1019(define-charset 'unicode-smp 1020 "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)" 1021 :short-name "Unicode SMP " 1022 :code-space [0 255 0 255] 1023 :code-offset #x10000) 1024 1025(define-charset 'unicode-sip 1026 "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)" 1027 :short-name "Unicode SIP" 1028 :code-space [0 255 0 255] 1029 :code-offset #x20000) 1030 1031(define-charset 'unicode-ssp 1032 "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)" 1033 :short-name "Unicode SSP" 1034 :code-space [0 255 0 255] 1035 :code-offset #xE0000) 1036 1037(define-charset 'ethiopic 1038 "Ethiopic characters for Amharic and Tigrigna." 1039 :short-name "Ethiopic" 1040 :long-name "Ethiopic characters" 1041 :iso-final-char ?3 1042 :emacs-mule-id 245 1043 :supplementary-p t 1044 :unify-map "MULE-ethiopic" 1045 :code-space [33 126 33 126] 1046 :code-offset #x1A0000) 1047 1048(define-charset 'mac-roman 1049 "Mac Roman charset" 1050 :short-name "Mac Roman" 1051 :ascii-compatible-p t 1052 :code-space [0 255] 1053 :map "MACINTOSH") 1054 1055;; Fixme: modern EBCDIC variants, e.g. IBM00924? 1056(define-charset 'ebcdic-us 1057 "US version of EBCDIC" 1058 :short-name "EBCDIC-US" 1059 :code-space [0 255] 1060 :mime-charset 'ebcdic-us 1061 :map "EBCDICUS") 1062 1063(define-charset 'ebcdic-uk 1064 "UK version of EBCDIC" 1065 :short-name "EBCDIC-UK" 1066 :code-space [0 255] 1067 :mime-charset 'ebcdic-uk 1068 :map "EBCDICUK") 1069 1070(define-charset 'ibm038 1071 "International version of EBCDIC" 1072 :short-name "IBM038" 1073 :code-space [0 255] 1074 :mime-charset 'ibm038 1075 :map "IBM038") 1076(define-charset-alias 'ebcdic-int 'ibm038) 1077(define-charset-alias 'cp038 'ibm038) 1078 1079(define-charset 'ibm256 1080 "Netherlands version of EBCDIC" 1081 :short-name "IBM256" 1082 :code-space [0 255] 1083 :mime-charset 'ibm256 1084 :map "IBM256") 1085 1086(define-charset 'ibm273 1087 "Austrian / German version of EBCDIC" 1088 :short-name "IBM273" 1089 :code-space [0 255] 1090 :mime-charset 'ibm273 1091 :map "IBM273") 1092 1093(define-charset 'ibm274 1094 "Belgian version of EBCDIC" 1095 :short-name "IBM274" 1096 :code-space [0 255] 1097 :mime-charset 'ibm274 1098 :map "IBM274") 1099 1100(define-charset 'ibm275 1101 "Brazilian version of EBCDIC" 1102 :short-name "IBM275" 1103 :code-space [0 255] 1104 :mime-charset 'ibm275 1105 :map "IBM275") 1106 1107(define-charset 'ibm277 1108 "Danish / Norwegian version of EBCDIC" 1109 :short-name "IBM277" 1110 :code-space [0 255] 1111 :mime-charset 'ibm277 1112 :map "IBM277") 1113 1114(define-charset 'ibm278 1115 "Finnish / Swedish version of EBCDIC" 1116 :short-name "IBM278" 1117 :code-space [0 255] 1118 :mime-charset 'ibm278 1119 :map "IBM278") 1120 1121(define-charset 'ibm280 1122 "Italian version of EBCDIC" 1123 :short-name "IBM280" 1124 :code-space [0 255] 1125 :mime-charset 'ibm270 1126 :map "IBM280") 1127 1128(define-charset 'ibm281 1129 "Japanese-E version of EBCDIC" 1130 :short-name "IBM281" 1131 :code-space [0 255] 1132 :mime-charset 'ibm281 1133 :map "IBM281") 1134 1135(define-charset 'ibm284 1136 "Spanish version of EBCDIC" 1137 :short-name "IBM284" 1138 :code-space [0 255] 1139 :mime-charset 'ibm284 1140 :map "IBM284") 1141 1142(define-charset 'ibm285 1143 "UK english version of EBCDIC" 1144 :short-name "IBM285" 1145 :code-space [0 255] 1146 :mime-charset 'ibm285 1147 :map "IBM285") 1148 1149(define-charset 'ibm290 1150 "Japanese katakana version of EBCDIC" 1151 :short-name "IBM290" 1152 :code-space [0 255] 1153 :mime-charset 'ibm290 1154 :map "IBM290") 1155 1156(define-charset 'ibm297 1157 "French version of EBCDIC" 1158 :short-name "IBM297" 1159 :code-space [0 255] 1160 :mime-charset 'ibm297 1161 :map "IBM297") 1162 1163(define-charset 'ibm1047 1164 ;; Says groff: 1165 "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix." 1166 :short-name "IBM1047" 1167 :code-space [0 255] 1168 :mime-charset 'ibm1047 1169 :map "IBM1047") 1170(define-charset-alias 'cp1047 'ibm1047) 1171 1172(define-charset 'hp-roman8 1173 "Encoding used by Hewlet-Packard printer software" 1174 :short-name "HP-ROMAN8" 1175 :ascii-compatible-p t 1176 :code-space [0 255] 1177 :map "HP-ROMAN8") 1178 1179;; To make a coding system with this, a pre-write-conversion should 1180;; account for the commented-out multi-valued code points in 1181;; stdenc.map. 1182(define-charset 'adobe-standard-encoding 1183 "Adobe `standard encoding' used in PostScript" 1184 :short-name "ADOBE-STANDARD-ENCODING" 1185 :code-space [#x20 255] 1186 :map "stdenc") 1187 1188(define-charset 'symbol 1189 "Adobe symbol encoding used in PostScript" 1190 :short-name "ADOBE-SYMBOL" 1191 :code-space [#x20 255] 1192 :map "symbol") 1193 1194(define-charset 'ibm850 1195 "DOS codepage 850 (Latin-1)" 1196 :short-name "IBM850" 1197 :ascii-compatible-p t 1198 :code-space [0 255] 1199 :map "IBM850") 1200(define-charset-alias 'cp850 'ibm850) 1201 1202(define-charset 'mik 1203 "Bulgarian DOS codepage" 1204 :short-name "MIK" 1205 :ascii-compatible-p t 1206 :code-space [0 255] 1207 :map "MIK") 1208 1209(define-charset 'ptcp154 1210 "ParaType codepage (Asian Cyrillic)" 1211 :short-name "PT154" 1212 :ascii-compatible-p t 1213 :code-space [0 255] 1214 :mime-charset 'pt154 1215 :map "PTCP154") 1216(define-charset-alias 'pt154 'ptcp154) 1217(define-charset-alias 'cp154 'ptcp154) 1218 1219(define-charset 'gb18030-2-byte 1220 "GB18030 2-byte (0x814E..0xFEFE)" 1221 :code-space [#x40 #xFE #x81 #xFE] 1222 :supplementary-p t 1223 :map "GB180302") 1224 1225(define-charset 'gb18030-4-byte-bmp 1226 "GB18030 4-byte for BMP (0x81308130-0x8431A439)" 1227 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84] 1228 :supplementary-p t 1229 :map "GB180304") 1230 1231(define-charset 'gb18030-4-byte-smp 1232 "GB18030 4-byte for SMP (0x90308130-0xE3329A35)" 1233 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3] 1234 :min-code '(#x9030 . #x8130) 1235 :max-code '(#xE332 . #x9A35) 1236 :supplementary-p t 1237 :code-offset #x10000) 1238 1239(define-charset 'gb18030-4-byte-ext-1 1240 "GB18030 4-byte (0x8431A530-0x8F39FE39)" 1241 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F] 1242 :min-code '(#x8431 . #xA530) 1243 :max-code '(#x8F39 . #xFE39) 1244 :supplementary-p t 1245 :code-offset #x200000 ; ... #x22484B 1246 ) 1247 1248(define-charset 'gb18030-4-byte-ext-2 1249 "GB18030 4-byte (0xE3329A36-0xFE39FE39)" 1250 :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE] 1251 :min-code '(#xE332 . #x9A36) 1252 :max-code '(#xFE39 . #xFE39) 1253 :supplementary-p t 1254 :code-offset #x22484C ; ... #x279f93 1255 ) 1256 1257(define-charset 'gb18030 1258 "GB18030" 1259 :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE] 1260 :min-code 0 1261 :max-code '(#xFE39 . #xFE39) 1262 :superset '(ascii gb18030-2-byte 1263 gb18030-4-byte-bmp gb18030-4-byte-smp 1264 gb18030-4-byte-ext-1 gb18030-4-byte-ext-2)) 1265 1266(define-charset 'chinese-cns11643-15 1267 "CNS11643 Plane 15 Chinese Traditional" 1268 :short-name "CNS11643-15" 1269 :long-name "CNS11643-15 (Chinese traditional)" 1270 :code-space [33 126 33 126] 1271 :code-offset #x27A000 1272 :unify-map "CNS-F") 1273 1274(unify-charset 'chinese-gb2312) 1275(unify-charset 'chinese-gbk) 1276(unify-charset 'chinese-cns11643-1) 1277(unify-charset 'chinese-cns11643-2) 1278(unify-charset 'chinese-cns11643-3) 1279(unify-charset 'chinese-cns11643-4) 1280(unify-charset 'chinese-cns11643-5) 1281(unify-charset 'chinese-cns11643-6) 1282(unify-charset 'chinese-cns11643-7) 1283(unify-charset 'chinese-cns11643-15) 1284(unify-charset 'big5) 1285(unify-charset 'chinese-big5-1) 1286(unify-charset 'chinese-big5-2) 1287(unify-charset 'big5-hkscs) 1288(unify-charset 'korean-ksc5601) 1289(unify-charset 'vietnamese-viscii-lower) 1290(unify-charset 'vietnamese-viscii-upper) 1291(unify-charset 'chinese-sisheng) 1292(unify-charset 'ipa) 1293(unify-charset 'tibetan) 1294(unify-charset 'ethiopic) 1295(unify-charset 'indian-is13194) 1296(unify-charset 'japanese-jisx0208-1978) 1297(unify-charset 'japanese-jisx0208) 1298(unify-charset 'japanese-jisx0212) 1299(unify-charset 'japanese-jisx0213-1) 1300(unify-charset 'japanese-jisx0213-2) 1301 1302 1303;; These are tables for translating characters on decoding and 1304;; encoding. 1305;; Fixme: these aren't used now -- should they be? 1306(setq standard-translation-table-for-decode nil) 1307 1308(setq standard-translation-table-for-encode nil) 1309 1310;;; Make fundamental coding systems. 1311 1312;; The coding system `no-conversion' and `undecided' are already 1313;; defined in coding.c as below: 1314;; 1315;; (define-coding-system 'no-conversion 1316;; "..." 1317;; :coding-type 'raw-text 1318;; ...) 1319;; (define-coding-system 'undecided 1320;; "..." 1321;; :coding-type 'undecided 1322;; ...) 1323 1324(define-coding-system-alias 'binary 'no-conversion) 1325(define-coding-system-alias 'unix 'undecided-unix) 1326(define-coding-system-alias 'dos 'undecided-dos) 1327(define-coding-system-alias 'mac 'undecided-mac) 1328 1329(define-coding-system 'prefer-utf-8 1330 "Like `undecided' but prefer UTF-8 when appropriate. 1331On decoding, if the source contains 8-bit codes and they all 1332are valid UTF-8 sequences, detect the source as UTF-8 encoding 1333regardless of the coding priority. 1334On encoding, if the source contains non-ASCII characters, encode them 1335by UTF-8." 1336 :coding-type 'undecided 1337 :mnemonic ?- 1338 :charset-list '(emacs) 1339 :prefer-utf-8 t 1340 :inhibit-null-byte-detection 0 1341 :inhibit-iso-escape-detection 0) 1342 1343(define-coding-system 'raw-text 1344 "Raw text, which means text contains random 8-bit codes. 1345Encoding text with this coding system produces the actual byte 1346sequence of the text in buffers and strings. An exception is made for 1347characters from the `eight-bit' character set. Each of them is encoded 1348into a single byte. 1349 1350When you visit a file with this coding, the file is read into a 1351unibyte buffer as is (except for EOL format), thus each byte of a file 1352is treated as a character." 1353 :coding-type 'raw-text 1354 :for-unibyte t 1355 :mnemonic ?t) 1356 1357(define-coding-system 'no-conversion-multibyte 1358 "Like `no-conversion' but don't read a file into a unibyte buffer." 1359 :coding-type 'raw-text 1360 :eol-type 'unix 1361 :mnemonic ?=) 1362 1363(define-coding-system 'iso-latin-1 1364 "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." 1365 :coding-type 'charset 1366 :mnemonic ?1 1367 :charset-list '(iso-8859-1) 1368 :mime-charset 'iso-8859-1) 1369 1370(define-coding-system-alias 'iso-8859-1 'iso-latin-1) 1371(define-coding-system-alias 'latin-1 'iso-latin-1) 1372 1373;; Coding systems not specific to each language environment. 1374 1375(define-coding-system 'emacs-mule 1376 "Emacs 21 internal format used in buffer and string." 1377 :coding-type 'emacs-mule 1378 :charset-list 'emacs-mule 1379 :mnemonic ?M) 1380 1381(define-coding-system 'utf-8 1382 "UTF-8 (no signature (BOM))" 1383 :coding-type 'utf-8 1384 :mnemonic ?U 1385 :charset-list '(unicode) 1386 :mime-charset 'utf-8) 1387 1388(define-coding-system 'utf-8-with-signature 1389 "UTF-8 (with signature (BOM))" 1390 :coding-type 'utf-8 1391 :mnemonic ?U 1392 :charset-list '(unicode) 1393 :bom t) 1394 1395(define-coding-system 'utf-8-auto 1396 "UTF-8 (auto-detect signature (BOM))" 1397 :coding-type 'utf-8 1398 :mnemonic ?U 1399 :charset-list '(unicode) 1400 :bom '(utf-8-with-signature . utf-8)) 1401 1402(define-coding-system-alias 'mule-utf-8 'utf-8) 1403;; See this page: 1404;; https://docs.microsoft.com/en-us/windows/desktop/intl/code-page-identifiers 1405;; Starting with Windows 10, people are trying to set their systems to 1406;; use UTF-8 , so we had better recognized this alias: 1407(define-coding-system-alias 'cp65001 'utf-8) 1408 1409(define-coding-system 'utf-8-emacs 1410 "Support for all Emacs characters (including non-Unicode characters)." 1411 :coding-type 'utf-8 1412 :mnemonic ?U 1413 :charset-list '(emacs)) 1414 1415;; The encoding used internally. This encoding is meant to be able to save 1416;; any multibyte buffer without losing information. It can change between 1417;; Emacs releases, tho, so should only be used for internal files. 1418(define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix) 1419 1420(define-coding-system 'utf-16le 1421 "UTF-16LE (little endian, no signature (BOM))." 1422 :coding-type 'utf-16 1423 :mnemonic ?U 1424 :charset-list '(unicode) 1425 :endian 'little 1426 :mime-text-unsuitable t 1427 :mime-charset 'utf-16le) 1428 1429(define-coding-system 'utf-16be 1430 "UTF-16BE (big endian, no signature (BOM))." 1431 :coding-type 'utf-16 1432 :mnemonic ?U 1433 :charset-list '(unicode) 1434 :endian 'big 1435 :mime-text-unsuitable t 1436 :mime-charset 'utf-16be) 1437 1438(define-coding-system 'utf-16le-with-signature 1439 "UTF-16 (little endian, with signature (BOM))." 1440 :coding-type 'utf-16 1441 :mnemonic ?U 1442 :charset-list '(unicode) 1443 :bom t 1444 :endian 'little 1445 :mime-text-unsuitable t 1446 :mime-charset 'utf-16) 1447 1448(define-coding-system 'utf-16be-with-signature 1449 "UTF-16 (big endian, with signature (BOM))." 1450 :coding-type 'utf-16 1451 :mnemonic ?U 1452 :charset-list '(unicode) 1453 :bom t 1454 :endian 'big 1455 :mime-text-unsuitable t 1456 :mime-charset 'utf-16) 1457 1458(define-coding-system 'utf-16 1459 "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)." 1460 :coding-type 'utf-16 1461 :mnemonic ?U 1462 :charset-list '(unicode) 1463 :bom '(utf-16le-with-signature . utf-16be-with-signature) 1464 :endian 'big 1465 :mime-text-unsuitable t 1466 :mime-charset 'utf-16) 1467 1468;; Backwards compatibility (old names, also used by Mule-UCS). We 1469;; prefer the MIME names. 1470(define-coding-system-alias 'utf-16-le 'utf-16le-with-signature) 1471(define-coding-system-alias 'utf-16-be 'utf-16be-with-signature) 1472 1473 1474(define-coding-system 'iso-2022-7bit 1475 "ISO 2022 based 7-bit encoding using only G0." 1476 :coding-type 'iso-2022 1477 :mnemonic ?J 1478 :charset-list 'iso-2022 1479 :designation [(ascii t) nil nil nil] 1480 :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition)) 1481 1482(define-coding-system 'iso-2022-7bit-ss2 1483 "ISO 2022 based 7-bit encoding using SS2 for 96-charset." 1484 :coding-type 'iso-2022 1485 :mnemonic ?$ 1486 :charset-list 'iso-2022 1487 :designation [(ascii 94) nil (nil 96) nil] 1488 :flags '(short ascii-at-eol ascii-at-cntl 7-bit 1489 designation single-shift composition)) 1490 1491(define-coding-system 'iso-2022-7bit-lock 1492 "ISO-2022 coding system using Locking-Shift for 96-charset." 1493 :coding-type 'iso-2022 1494 :mnemonic ?& 1495 :charset-list 'iso-2022 1496 :designation [(ascii 94) (nil 96) nil nil] 1497 :flags '(ascii-at-eol ascii-at-cntl 7-bit 1498 designation locking-shift composition)) 1499 1500(define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) 1501 1502(define-coding-system 'iso-2022-7bit-lock-ss2 1503 "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN." 1504 :coding-type 'iso-2022 1505 :mnemonic ?i 1506 :charset-list '(ascii 1507 japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 1508 korean-ksc5601 1509 chinese-gb2312 1510 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 1511 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 1512 chinese-cns11643-7) 1513 :designation [(ascii 94) 1514 (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96) 1515 (nil chinese-cns11643-2) 1516 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 1517 chinese-cns11643-6 chinese-cns11643-7)] 1518 :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift 1519 single-shift init-bol)) 1520 1521(define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) 1522 1523(define-coding-system 'iso-2022-8bit-ss2 1524 "ISO 2022 based 8-bit encoding using SS2 for 96-charset." 1525 :coding-type 'iso-2022 1526 :mnemonic ?@ 1527 :charset-list 'iso-2022 1528 :designation [(ascii 94) nil (nil 96) nil] 1529 :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) 1530 1531(define-coding-system 'compound-text 1532 "Compound text based generic encoding. 1533This coding system is an extension of X's \"Compound Text Encoding\". 1534It encodes many characters using the normal ISO-2022 designation sequences, 1535but it doesn't support extended segments of CTEXT." 1536 :coding-type 'iso-2022 1537 :mnemonic ?x 1538 :charset-list 'iso-2022 1539 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] 1540 :flags '(ascii-at-eol ascii-at-cntl long-form 1541 designation locking-shift single-shift composition) 1542 ;; Fixme: this isn't a valid MIME charset and has to be 1543 ;; special-cased elsewhere -- fx 1544 :mime-charset 'x-ctext) 1545 1546(define-coding-system-alias 'x-ctext 'compound-text) 1547(define-coding-system-alias 'ctext 'compound-text) 1548 1549;; Same as compound-text, but doesn't produce composition escape 1550;; sequences. Used in post-read and pre-write conversions of 1551;; compound-text-with-extensions, see mule.el. Note that this should 1552;; not have a mime-charset property, to prevent it from showing up 1553;; close to the beginning of coding systems ordered by priority. 1554(define-coding-system 'ctext-no-compositions 1555 "Compound text based generic encoding. 1556 1557Like `compound-text', but does not produce escape sequences for compositions." 1558 :coding-type 'iso-2022 1559 :mnemonic ?x 1560 :charset-list 'iso-2022 1561 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] 1562 :flags '(ascii-at-eol ascii-at-cntl 1563 designation locking-shift single-shift)) 1564 1565(define-coding-system 'compound-text-with-extensions 1566 "Compound text encoding with ICCCM Extended Segment extensions. 1567 1568See the variables `ctext-standard-encodings' and 1569`ctext-non-standard-encodings-alist' for the detail about how 1570extended segments are handled. 1571 1572This coding system should be used only for X selections. It is inappropriate 1573for decoding and encoding files, process I/O, etc." 1574 :coding-type 'iso-2022 1575 :mnemonic ?x 1576 :charset-list 'iso-2022 1577 :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] 1578 :flags '(ascii-at-eol ascii-at-cntl long-form 1579 designation locking-shift single-shift) 1580 :post-read-conversion 'ctext-post-read-conversion 1581 :pre-write-conversion 'ctext-pre-write-conversion 1582 :mime-charset 'x-ctext) 1583 1584(define-coding-system-alias 1585 'x-ctext-with-extensions 'compound-text-with-extensions) 1586(define-coding-system-alias 1587 'ctext-with-extensions 'compound-text-with-extensions) 1588 1589(define-coding-system 'us-ascii 1590 "Encode ASCII as-is and encode non-ASCII characters to `?'." 1591 :coding-type 'charset 1592 :mnemonic ?- 1593 :charset-list '(ascii) 1594 :default-char ?? 1595 :mime-charset 'us-ascii) 1596 1597(define-coding-system-alias 'iso-safe 'us-ascii) 1598(define-coding-system-alias 'ascii 'us-ascii) 1599 1600(define-coding-system 'utf-7 1601 "UTF-7 encoding of Unicode (RFC 2152)." 1602 :coding-type 'utf-8 1603 :mnemonic ?U 1604 :mime-charset 'utf-7 1605 :charset-list '(unicode) 1606 :pre-write-conversion 'utf-7-pre-write-conversion 1607 :post-read-conversion 'utf-7-post-read-conversion) 1608;; FIXME: 'define-coding-system' automatically sets :ascii-compatible-p, 1609;; to any encoding whose :coding-type is 'utf-8', but UTF-7 is not ASCII 1610;; compatible, so we override that here (bug#40407). 1611(coding-system-put 'utf-7 :ascii-compatible-p nil) 1612 1613(define-coding-system 'utf-7-imap 1614 "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" 1615 :coding-type 'utf-8 1616 :mnemonic ?u 1617 :charset-list '(unicode) 1618 :pre-write-conversion 'utf-7-imap-pre-write-conversion 1619 :post-read-conversion 'utf-7-imap-post-read-conversion) 1620;; See comment for utf-7 above. 1621(coding-system-put 'utf-7-imap :ascii-compatible-p nil) 1622 1623;; Use us-ascii for terminal output if some other coding system is not 1624;; specified explicitly. 1625(set-safe-terminal-coding-system-internal 'us-ascii) 1626 1627;; The other coding-systems are defined in each language specific 1628;; files under lisp/language. 1629 1630;; Normally, set coding system to `undecided' before reading a file. 1631;; Compiled Emacs Lisp files (*.elc) are not decoded at all, 1632;; but we regard them as containing multibyte characters. 1633;; Tar files are not decoded at all, but we treat them as raw bytes. 1634 1635(setq file-coding-system-alist 1636 (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg))) 1637 '(("\\.elc\\'" . utf-8-emacs) 1638 ("\\.el\\'" . prefer-utf-8) 1639 ("\\.utf\\(-8\\)?\\'" . utf-8) 1640 ("\\.xml\\'" . xml-find-file-coding-system) 1641 ;; We use raw-text for reading loaddefs.el so that if it 1642 ;; happens to have DOS or Mac EOLs, they are converted to 1643 ;; newlines. This is required to make the special treatment 1644 ;; of the "\ newline" combination in loaddefs.el, which marks 1645 ;; the beginning of a doc string, work. 1646 ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix)) 1647 ("\\.tar\\'" . (no-conversion . no-conversion)) 1648 ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system) 1649 ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system) 1650 ("" . (undecided . nil))))) 1651 1652 1653;;; Setting coding categories and their priorities. 1654 1655;; This setting is just to read an Emacs Lisp source files which 1656;; contain multilingual text while dumping Emacs. More appropriate 1657;; values are set by the command `set-language-environment' for each 1658;; language environment. 1659 1660(set-coding-system-priority 1661 'iso-latin-1 1662 'utf-8 1663 'iso-2022-7bit 1664 ) 1665 1666 1667;;; Miscellaneous settings. 1668 1669;; Make all multibyte characters self-insert. 1670(set-char-table-range (nth 1 global-map) 1671 (cons 128 (max-char)) 1672 'self-insert-command) 1673 1674(aset latin-extra-code-table ?\221 t) 1675(aset latin-extra-code-table ?\222 t) 1676(aset latin-extra-code-table ?\223 t) 1677(aset latin-extra-code-table ?\224 t) 1678(aset latin-extra-code-table ?\225 t) 1679(aset latin-extra-code-table ?\226 t) 1680 1681(defcustom password-word-equivalents 1682 '("password" "passcode" "passphrase" "pass phrase" "pin" 1683 "decryption key" "encryption key" ; From ccrypt. 1684 ; These are sorted according to the GNU en_US locale. 1685 "암호" ; ko 1686 "パスワード" ; ja 1687 "ପ୍ରବେଶ ସଙ୍କେତ" ; or 1688 "ពាក្យសម្ងាត់" ; km 1689 "adgangskode" ; da 1690 "contraseña" ; es 1691 "contrasenya" ; ca 1692 "geslo" ; sl 1693 "hasło" ; pl 1694 "heslo" ; cs, sk 1695 "iphasiwedi" ; zu 1696 "jelszó" ; hu 1697 "lösenord" ; sv 1698 "lozinka" ; hr, sr 1699 "mật khẩu" ; vi 1700 "mot de passe" ; fr 1701 "parola" ; tr 1702 "pasahitza" ; eu 1703 "passord" ; nb 1704 "passwort" ; de 1705 "pasvorto" ; eo 1706 "salasana" ; fi 1707 "senha" ; pt 1708 "slaptažodis" ; lt 1709 "wachtwoord" ; nl 1710 "كلمة السر" ; ar 1711 "ססמה" ; he 1712 "лозинка" ; sr 1713 "пароль" ; kk, ru, uk 1714 "गुप्तशब्द" ; mr 1715 "शब्दकूट" ; hi 1716 "પાસવર્ડ" ; gu 1717 "సంకేతపదము" ; te 1718 "ਪਾਸਵਰਡ" ; pa 1719 "ಗುಪ್ತಪದ" ; kn 1720 "கடவுச்சொல்" ; ta 1721 "അടയാളവാക്ക്" ; ml 1722 "গুপ্তশব্দ" ; as 1723 "পাসওয়ার্ড" ; bn_IN 1724 "රහස්පදය" ; si 1725 "密码" ; zh_CN 1726 "密碼" ; zh_TW 1727 ) 1728 "List of words equivalent to \"password\". 1729This is used by Shell mode and other parts of Emacs to recognize 1730password prompts, including prompts in languages other than 1731English. Different case choices should not be assumed to be 1732included; callers should bind `case-fold-search' to t." 1733 :type '(repeat string) 1734 :version "27.1" 1735 :group 'processes) 1736 1737;; The old code-pages library is obsoleted by coding systems based on 1738;; the charsets defined in this file but might be required by user 1739;; code. 1740(provide 'code-pages) 1741 1742;;; mule-conf.el ends here 1743