1;;; cyrillic.el --- support for Cyrillic -*- coding: utf-8; -*- 2 3;; Copyright (C) 1997-1998, 2001-2021 Free Software Foundation, Inc. 4;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 5;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 6;; National Institute of Advanced Industrial Science and Technology (AIST) 7;; Registration Number H14PRO021 8;; Copyright (C) 2003 9;; National Institute of Advanced Industrial Science and Technology (AIST) 10;; Registration Number H13PRO009 11 12;; Author: Kenichi Handa <handa@gnu.org> 13;; Keywords: multilingual, Cyrillic, i18n 14 15;; This file is part of GNU Emacs. 16 17;; GNU Emacs is free software: you can redistribute it and/or modify 18;; it under the terms of the GNU General Public License as published by 19;; the Free Software Foundation, either version 3 of the License, or 20;; (at your option) any later version. 21 22;; GNU Emacs is distributed in the hope that it will be useful, 23;; but WITHOUT ANY WARRANTY; without even the implied warranty of 24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25;; GNU General Public License for more details. 26 27;; You should have received a copy of the GNU General Public License 28;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 29 30;;; Commentary: 31 32;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ 33;; are converted to Unicode internally. See 34;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info 35;; on Cyrillic charsets, see 36;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and 37;; Alternativnyj coding systems should live in code-pages.el, but 38;; they've always been preloaded and the coding system autoload 39;; mechanism didn't get accepted, so they have to stay here and 40;; duplicate code-pages stuff. 41 42;; Note that 8859-5 maps directly onto the Unicode Cyrillic block, 43;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen, 44;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and 45;; Alternativnyj coding systems encode both 8859-5 and Unicode. 46;; ucs-tables.el provides unification for cyrillic-iso-8bit. 47 48;; Customizing `utf-fragment-on-decoding' allows decoding characters 49;; from KOI and Alternativnyj into 8859-5 where that's possible. 50;; cyrillic-iso8859-5 characters take half as much space in the buffer 51;; as the mule-unicode-0100-24ff equivalents, though that's probably 52;; not normally a big deal. 53 54;;; Code: 55 56;; Cyrillic (general) 57 58;; ISO-8859-5 stuff 59 60(define-coding-system 'cyrillic-iso-8bit 61 "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)." 62 :coding-type 'charset 63 :mnemonic ?5 64 :charset-list '(iso-8859-5) 65 :mime-charset 'iso-8859-5) 66 67(define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit) 68 69(set-language-info-alist 70 "Cyrillic-ISO" '((charset iso-8859-5) 71 (coding-system cyrillic-iso-8bit) 72 (coding-priority cyrillic-iso-8bit) 73 (input-method . "cyrillic-yawerty") ; fixme 74 (nonascii-translation . iso-8859-5) 75 (unibyte-display . cyrillic-iso-8bit) 76 (features cyril-util) 77 (sample-text . "Russian (Русский) Здравствуйте!") 78 (documentation . "Support for Cyrillic ISO-8859-5.")) 79 '("Cyrillic")) 80 81;; KOI-8R stuff 82 83(define-coding-system 'cyrillic-koi8 84 "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)." 85 :coding-type 'charset 86 ;; We used to use ?K. It is true that ?K is more strictly correct, 87 ;; but it is also used for Korean. So people who use koi8 for 88 ;; languages other than Russian will have to forgive us. 89 :mnemonic ?R 90 :charset-list '(koi8) 91 :mime-charset 'koi8-r) 92 93(define-coding-system-alias 'koi8-r 'cyrillic-koi8) 94(define-coding-system-alias 'koi8 'cyrillic-koi8) 95(define-coding-system-alias 'cp878 'cyrillic-koi8) 96 97(set-language-info-alist 98 "Cyrillic-KOI8" '((charset koi8) 99 (coding-system cyrillic-koi8) 100 (coding-priority cyrillic-koi8 cyrillic-iso-8bit) 101 (ctext-non-standard-encodings "koi8-r") 102 (nonascii-translation . koi8) 103 (input-method . "russian-typewriter") 104 (features cyril-util) 105 (unibyte-display . cyrillic-koi8) 106 (sample-text . "Russian (Русский) Здравствуйте!") 107 (documentation . "Support for Cyrillic KOI8-R.")) 108 '("Cyrillic")) 109 110(set-language-info-alist 111 "Russian" `((charset cyrillic-iso8859-5) 112 (nonascii-translation 113 . ,(get 'cyrillic-koi8-r-nonascii-translation-table 114 'translation-table)) 115 (coding-system cyrillic-koi8) 116 (coding-priority cyrillic-koi8 cyrillic-iso-8bit) 117 (input-method . "russian-computer") 118 (features cyril-util) 119 (unibyte-display . cyrillic-koi8) 120 (sample-text . "Russian (Русский) Здравствуйте!") 121 (documentation . "\ 122Support for Russian using koi8-r and the russian-computer input method.") 123 (tutorial . "TUTORIAL.ru")) 124 '("Cyrillic")) 125 126(define-coding-system 'koi8-u 127 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" 128 :coding-type 'charset 129 :mnemonic ?U 130 :charset-list '(koi8-u) 131 :mime-charset 'koi8-u) 132 133(set-language-info-alist 134 "Ukrainian" '((charset koi8-u) 135 (coding-system koi8-u) 136 (coding-priority koi8-u) 137 (nonascii-translation . koi8-u) 138 (input-method . "ukrainian-computer") 139 (documentation 140 . "Support for Ukrainian with KOI8-U character set.")) 141 '("Cyrillic")) 142 143;;; ALTERNATIVNYJ stuff 144 145(define-coding-system 'cyrillic-alternativnyj 146 "ALTERNATIVNYJ 8-bit encoding for Cyrillic." 147 :coding-type 'charset 148 :mnemonic ?A 149 :charset-list '(alternativnyj)) 150 151(define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) 152 153(set-language-info-alist 154 "Cyrillic-ALT" '((charset alternativnyj) 155 (coding-system cyrillic-alternativnyj) 156 (coding-priority cyrillic-alternativnyj) 157 (nonascii-translation . alternativnyj) 158 (input-method . "russian-typewriter") 159 (features cyril-util) 160 (unibyte-display . cyrillic-alternativnyj) 161 (sample-text . "Russian (Русский) Здравствуйте!") 162 (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) 163 '("Cyrillic")) 164 165(define-coding-system 'cp866 166 "CP866 encoding for Cyrillic." 167 :coding-type 'charset 168 :mnemonic ?* 169 :charset-list '(ibm866) 170 :mime-charset 'cp866) 171 172(define-coding-system 'koi8-u 173 "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" 174 :coding-type 'charset 175 :mnemonic ?U 176 :charset-list '(koi8-u) 177 :mime-charset 'koi8-u) 178 179(define-coding-system 'koi8-t 180 "KOI8-T 8-bit encoding for Cyrillic" 181 :coding-type 'charset 182 :mnemonic ?* 183 :charset-list '(koi8-t) 184 :mime-charset 'koi8-t) 185 186(define-coding-system 'windows-1251 187 "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)" 188 :coding-type 'charset 189 :mnemonic ?b 190 :charset-list '(windows-1251) 191 :mime-charset 'windows-1251) 192(define-coding-system-alias 'cp1251 'windows-1251) 193 194(define-coding-system 'cp1125 195 "cp1125 8-bit encoding for Cyrillic" 196 :coding-type 'charset 197 :mnemonic ?* 198 :charset-list '(cp1125)) 199(define-coding-system-alias 'ruscii 'cp1125) 200;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> 201(define-coding-system-alias 'cp866u 'cp1125) 202 203(define-coding-system 'cp855 204 "DOS codepage 855 (Russian)" 205 :coding-type 'charset 206 :mnemonic ?D 207 :charset-list '(cp855) 208 :mime-charset 'cp855) 209(define-coding-system-alias 'ibm855 'cp855) 210 211(define-coding-system 'mik 212 "Bulgarian DOS codepage" 213 :coding-type 'charset 214 :mnemonic ?D 215 :charset-list '(mik)) 216 217(define-coding-system 'pt154 218 "ParaType Asian Cyrillic codepage" 219 :coding-type 'charset 220 :mnemonic ?D 221 :charset-list '(pt154)) 222 223;; (set-language-info-alist 224;; "Windows-1251" `((coding-system windows-1251) 225;; (coding-priority windows-1251) 226;; (input-method . "russian-typewriter") ; fixme? 227;; (features code-pages) 228;; (documentation . "Support for windows-1251 character set.")) 229;; '("Cyrillic")) 230 231(set-language-info-alist 232 "Tajik" '((coding-system koi8-t) 233 (coding-priority koi8-t) 234 (nonascii-translation . cyrillic-koi8-t) 235 (charset koi8-t) 236 (input-method . "russian-typewriter") ; fixme? 237 (features code-pages) 238 (documentation . "Support for Tajik using KOI8-T.")) 239 '("Cyrillic")) 240 241(set-language-info-alist 242 "Bulgarian" '((coding-system windows-1251) 243 (coding-priority windows-1251) 244 (nonascii-translation . windows-1251) 245 (charset windows-1251) 246 (ctext-non-standard-encodings "microsoft-cp1251") 247 (input-method . "bulgarian-bds") 248 (documentation 249 . "Support for Bulgarian with windows-1251 character set.")) 250 '("Cyrillic")) 251 252(set-language-info-alist 253 "Belarusian" '((coding-system windows-1251) 254 (coding-priority windows-1251) 255 (nonascii-translation . windows-1251) 256 (charset windows-1251) 257 (ctext-non-standard-encodings "microsoft-cp1251") 258 (input-method . "belarusian") 259 (documentation 260 . "Support for Belarusian with windows-1251 character set. 261\(The name Belarusian replaced Byelorussian in the early 1990s.)")) 262 '("Cyrillic")) 263 264(set-language-info-alist 265 "Ukrainian" '((coding-system koi8-u) 266 (coding-priority koi8-u) 267 (input-method . "ukrainian-computer") 268 (documentation 269 . "Support for Ukrainian with koi8-u character set.")) 270 '("Cyrillic")) 271 272(provide 'cyrillic) 273 274;;; cyrillic.el ends here 275