1;;; cyrillic.el --- support for Cyrillic -*- coding: utf-8; -*-
2
3;; Copyright (C) 1997-1998, 2001-2021 Free Software Foundation, Inc.
4;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
6;;   National Institute of Advanced Industrial Science and Technology (AIST)
7;;   Registration Number H14PRO021
8;; Copyright (C) 2003
9;;   National Institute of Advanced Industrial Science and Technology (AIST)
10;;   Registration Number H13PRO009
11
12;; Author: Kenichi Handa <handa@gnu.org>
13;; Keywords: multilingual, Cyrillic, i18n
14
15;; This file is part of GNU Emacs.
16
17;; GNU Emacs is free software: you can redistribute it and/or modify
18;; it under the terms of the GNU General Public License as published by
19;; the Free Software Foundation, either version 3 of the License, or
20;; (at your option) any later version.
21
22;; GNU Emacs is distributed in the hope that it will be useful,
23;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25;; GNU General Public License for more details.
26
27;; You should have received a copy of the GNU General Public License
28;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
29
30;;; Commentary:
31
32;; The character set ISO8859-5 is supported.  KOI-8 and ALTERNATIVNYJ
33;; are converted to Unicode internally.  See
34;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>.  For more info
35;; on Cyrillic charsets, see
36;; <URL:http://czyborra.com/charsets/cyrillic.html>.  The KOI and
37;; Alternativnyj coding systems should live in code-pages.el, but
38;; they've always been preloaded and the coding system autoload
39;; mechanism didn't get accepted, so they have to stay here and
40;; duplicate code-pages stuff.
41
42;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
43;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
44;; c.f. U+04OD) and 253 (section sign, c.f U+045D).  The KOI-8 and
45;; Alternativnyj coding systems encode both 8859-5 and Unicode.
46;; ucs-tables.el provides unification for cyrillic-iso-8bit.
47
48;; Customizing `utf-fragment-on-decoding' allows decoding characters
49;; from KOI and Alternativnyj into 8859-5 where that's possible.
50;; cyrillic-iso8859-5 characters take half as much space in the buffer
51;; as the mule-unicode-0100-24ff equivalents, though that's probably
52;; not normally a big deal.
53
54;;; Code:
55
56;; Cyrillic (general)
57
58;; ISO-8859-5 stuff
59
60(define-coding-system 'cyrillic-iso-8bit
61  "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
62  :coding-type 'charset
63  :mnemonic ?5
64  :charset-list '(iso-8859-5)
65  :mime-charset 'iso-8859-5)
66
67(define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit)
68
69(set-language-info-alist
70 "Cyrillic-ISO" '((charset iso-8859-5)
71		  (coding-system cyrillic-iso-8bit)
72		  (coding-priority cyrillic-iso-8bit)
73		  (input-method . "cyrillic-yawerty") ; fixme
74		  (nonascii-translation . iso-8859-5)
75		  (unibyte-display . cyrillic-iso-8bit)
76		  (features cyril-util)
77		  (sample-text . "Russian (Русский)	Здравствуйте!")
78		  (documentation . "Support for Cyrillic ISO-8859-5."))
79 '("Cyrillic"))
80
81;; KOI-8R stuff
82
83(define-coding-system 'cyrillic-koi8
84  "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)."
85  :coding-type 'charset
86  ;; We used to use ?K.  It is true that ?K is more strictly correct,
87  ;; but it is also used for Korean.  So people who use koi8 for
88  ;; languages other than Russian will have to forgive us.
89  :mnemonic ?R
90  :charset-list '(koi8)
91  :mime-charset 'koi8-r)
92
93(define-coding-system-alias 'koi8-r 'cyrillic-koi8)
94(define-coding-system-alias 'koi8 'cyrillic-koi8)
95(define-coding-system-alias 'cp878 'cyrillic-koi8)
96
97(set-language-info-alist
98 "Cyrillic-KOI8" '((charset koi8)
99		   (coding-system cyrillic-koi8)
100		   (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
101		   (ctext-non-standard-encodings "koi8-r")
102		   (nonascii-translation . koi8)
103		   (input-method . "russian-typewriter")
104		   (features cyril-util)
105		   (unibyte-display . cyrillic-koi8)
106		   (sample-text . "Russian (Русский)	Здравствуйте!")
107		   (documentation . "Support for Cyrillic KOI8-R."))
108 '("Cyrillic"))
109
110(set-language-info-alist
111 "Russian" `((charset cyrillic-iso8859-5)
112	     (nonascii-translation
113	      . ,(get 'cyrillic-koi8-r-nonascii-translation-table
114		      'translation-table))
115	     (coding-system cyrillic-koi8)
116	     (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
117	     (input-method . "russian-computer")
118	     (features cyril-util)
119	     (unibyte-display . cyrillic-koi8)
120	     (sample-text . "Russian (Русский)	Здравствуйте!")
121	     (documentation . "\
122Support for Russian using koi8-r and the russian-computer input method.")
123	     (tutorial . "TUTORIAL.ru"))
124 '("Cyrillic"))
125
126(define-coding-system 'koi8-u
127  "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
128  :coding-type 'charset
129  :mnemonic ?U
130  :charset-list '(koi8-u)
131  :mime-charset 'koi8-u)
132
133(set-language-info-alist
134 "Ukrainian" '((charset koi8-u)
135	       (coding-system koi8-u)
136	       (coding-priority koi8-u)
137	       (nonascii-translation . koi8-u)
138	       (input-method . "ukrainian-computer")
139	       (documentation
140		. "Support for Ukrainian with KOI8-U character set."))
141 '("Cyrillic"))
142
143;;; ALTERNATIVNYJ stuff
144
145(define-coding-system 'cyrillic-alternativnyj
146  "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
147  :coding-type 'charset
148  :mnemonic ?A
149  :charset-list '(alternativnyj))
150
151(define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
152
153(set-language-info-alist
154 "Cyrillic-ALT" '((charset alternativnyj)
155		  (coding-system cyrillic-alternativnyj)
156		  (coding-priority cyrillic-alternativnyj)
157		  (nonascii-translation . alternativnyj)
158		  (input-method . "russian-typewriter")
159		  (features cyril-util)
160		  (unibyte-display . cyrillic-alternativnyj)
161		  (sample-text . "Russian (Русский)	Здравствуйте!")
162		  (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
163 '("Cyrillic"))
164
165(define-coding-system 'cp866
166  "CP866 encoding for Cyrillic."
167  :coding-type 'charset
168  :mnemonic ?*
169  :charset-list '(ibm866)
170  :mime-charset 'cp866)
171
172(define-coding-system 'koi8-u
173  "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
174  :coding-type 'charset
175  :mnemonic ?U
176  :charset-list '(koi8-u)
177  :mime-charset 'koi8-u)
178
179(define-coding-system 'koi8-t
180  "KOI8-T 8-bit encoding for Cyrillic"
181  :coding-type 'charset
182  :mnemonic ?*
183  :charset-list '(koi8-t)
184  :mime-charset 'koi8-t)
185
186(define-coding-system 'windows-1251
187  "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)"
188  :coding-type 'charset
189  :mnemonic ?b
190  :charset-list '(windows-1251)
191  :mime-charset 'windows-1251)
192(define-coding-system-alias 'cp1251 'windows-1251)
193
194(define-coding-system 'cp1125
195  "cp1125 8-bit encoding for Cyrillic"
196  :coding-type 'charset
197  :mnemonic ?*
198  :charset-list '(cp1125))
199(define-coding-system-alias 'ruscii 'cp1125)
200;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
201(define-coding-system-alias 'cp866u 'cp1125)
202
203(define-coding-system 'cp855
204  "DOS codepage 855 (Russian)"
205  :coding-type 'charset
206  :mnemonic ?D
207  :charset-list '(cp855)
208  :mime-charset 'cp855)
209(define-coding-system-alias 'ibm855 'cp855)
210
211(define-coding-system 'mik
212  "Bulgarian DOS codepage"
213  :coding-type 'charset
214  :mnemonic ?D
215  :charset-list '(mik))
216
217(define-coding-system 'pt154
218  "ParaType Asian Cyrillic codepage"
219  :coding-type 'charset
220  :mnemonic ?D
221  :charset-list '(pt154))
222
223;; (set-language-info-alist
224;;  "Windows-1251" `((coding-system windows-1251)
225;; 		  (coding-priority windows-1251)
226;; 		  (input-method . "russian-typewriter") ; fixme?
227;; 		  (features code-pages)
228;; 		  (documentation . "Support for windows-1251 character set."))
229;;  '("Cyrillic"))
230
231(set-language-info-alist
232 "Tajik" '((coding-system koi8-t)
233	   (coding-priority koi8-t)
234	   (nonascii-translation . cyrillic-koi8-t)
235	   (charset koi8-t)
236	   (input-method . "russian-typewriter") ; fixme?
237	   (features code-pages)
238	   (documentation . "Support for Tajik using KOI8-T."))
239 '("Cyrillic"))
240
241(set-language-info-alist
242 "Bulgarian" '((coding-system windows-1251)
243	       (coding-priority windows-1251)
244	       (nonascii-translation . windows-1251)
245	       (charset windows-1251)
246	       (ctext-non-standard-encodings "microsoft-cp1251")
247	       (input-method . "bulgarian-bds")
248	       (documentation
249		. "Support for Bulgarian with windows-1251 character set."))
250 '("Cyrillic"))
251
252(set-language-info-alist
253 "Belarusian" '((coding-system windows-1251)
254		(coding-priority windows-1251)
255		(nonascii-translation . windows-1251)
256		(charset windows-1251)
257		(ctext-non-standard-encodings "microsoft-cp1251")
258		(input-method . "belarusian")
259		(documentation
260		 . "Support for Belarusian with windows-1251 character set.
261\(The name Belarusian replaced Byelorussian in the early 1990s.)"))
262 '("Cyrillic"))
263
264(set-language-info-alist
265 "Ukrainian" '((coding-system koi8-u)
266	       (coding-priority koi8-u)
267	       (input-method . "ukrainian-computer")
268	       (documentation
269		. "Support for Ukrainian with koi8-u character set."))
270 '("Cyrillic"))
271
272(provide 'cyrillic)
273
274;;; cyrillic.el ends here
275