1;;; indian.el --- Indian languages support -*- coding: utf-8; lexical-binding: t; -*-
2
3;; Copyright (C) 1997, 1999, 2001-2021 Free Software Foundation, Inc.
4;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5;;   National Institute of Advanced Industrial Science and Technology (AIST)
6;;   Registration Number H14PRO021
7
8;; Keywords: 	multilingual, i18n, Indian
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software: you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
24
25;;; Commentary:
26
27;; This file contains definitions of Indian language environments, and
28;; setups for displaying the scripts used there.
29
30;;; Code:
31
32(define-coding-system 'in-is13194-devanagari
33  "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)."
34  :coding-type 'iso-2022
35  :mnemonic ?D
36  :designation [ascii indian-is13194 nil nil]
37  :charset-list '(ascii indian-is13194)
38  :post-read-conversion 'in-is13194-post-read-conversion
39  :pre-write-conversion 'in-is13194-pre-write-conversion)
40
41(define-coding-system-alias 'devanagari 'in-is13194-devanagari)
42
43(set-language-info-alist
44 "Devanagari" '((charset unicode)
45		(coding-system utf-8)
46		(coding-priority utf-8)
47		(input-method . "devanagari-aiba")
48		(documentation . "\
49Such languages using Devanagari script as Hindi and Marathi
50are supported in this language environment."))
51 '("Indian"))
52
53(set-language-info-alist
54 "Bengali" '((charset unicode)
55	     (coding-system utf-8)
56	     (coding-priority utf-8)
57	     (input-method . "bengali-itrans")
58	     (documentation . "\
59Such languages using Bengali script as Bengali and Assamese
60are supported in this language environment."))
61 '("Indian"))
62
63(set-language-info-alist
64 "Punjabi" '((charset unicode)
65	      (coding-system utf-8)
66	      (coding-priority utf-8)
67	      (input-method . "punjabi-itrans")
68	      (documentation . "\
69North Indian language Punjabi is supported in this language environment."))
70 '("Indian"))
71
72(set-language-info-alist
73 "Gujarati" '((charset unicode)
74	      (coding-system utf-8)
75	      (coding-priority utf-8)
76	      (input-method . "gujarati-itrans")
77	      (documentation . "\
78North Indian language Gujarati is supported in this language environment."))
79 '("Indian"))
80
81(set-language-info-alist
82 "Oriya" '((charset unicode)
83	      (coding-system utf-8)
84	      (coding-priority utf-8)
85	      (input-method . "oriya-itrans")
86	      (documentation . "\
87Such languages using Oriya script as Oriya, Khonti, and Santali
88are supported in this language environment."))
89 '("Indian"))
90
91(set-language-info-alist
92 "Tamil" '((charset unicode)
93	   (coding-system utf-8)
94	   (coding-priority utf-8)
95	   (input-method . "tamil-itrans")
96	   (documentation . "\
97South Indian Language Tamil is supported in this language environment."))
98 '("Indian"))
99
100(set-language-info-alist
101 "Telugu" '((charset unicode)
102	    (coding-system utf-8)
103	    (coding-priority utf-8)
104	    (input-method . "telugu-itrans")
105	    (documentation . "\
106South Indian Language Telugu is supported in this language environment."))
107 '("Indian"))
108
109(set-language-info-alist
110 "Kannada" '((charset unicode)
111	     (coding-system mule-utf-8)
112	     (coding-priority mule-utf-8)
113	     (input-method . "kannada-itrans")
114	     (sample-text . "Kannada (ಕನ್ನಡ)	ನಮಸ್ಕಾರ")
115	     (documentation . "\
116Kannada language and script is supported in this language
117environment."))
118 '("Indian"))
119
120(set-language-info-alist
121 "Malayalam" '((charset unicode)
122	       (coding-system utf-8)
123	       (coding-priority utf-8)
124	       (input-method . "malayalam-itrans")
125	       (documentation . "\
126South Indian language Malayalam is supported in this language environment."))
127 '("Indian"))
128
129;; Replace mnemonic characters in REGEXP according to TABLE.  TABLE is
130;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
131
132(defun indian-compose-regexp (regexp table)
133  (let ((case-fold-search nil))
134    (dolist (elt table)
135      (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t)))
136    regexp))
137
138(defconst devanagari-composable-pattern
139  (let ((table
140	 '(("a" . "[\u0900-\u0902]")	; vowel modifier (above)
141	   ("A" . "\u0903")		; vowel modifier (post)
142	   ("V" . "[\u0904-\u0914\u0960\u0961\u0972]") ; independent vowel
143	   ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant
144	   ("R" . "\u0930")		; RA
145	   ("n" . "\u093C")		; NUKTA
146	   ("v" . "[\u093E-\u094C\u094E\u0955\u0962\u0963]") ; vowel sign
147	   ("H" . "\u094D")		; HALANT
148	   ("s" . "[\u0951\u0952]")	; stress sign
149	   ("t" . "[\u0953\u0954]")	; accent
150	   ("N" . "\u200C")		; ZWNJ
151	   ("J" . "\u200D")		; ZWJ
152	   ("X" . "[\u0900-\u097F]"))))	; all coverage
153    (indian-compose-regexp
154     (concat
155      ;; syllables with an independent vowel, or
156      "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|"
157      ;; consonant-based syllables, or
158      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|"
159      ;; special consonant form, or
160      "JHR\\|"
161      ;; any other singleton characters
162      "X")
163     table))
164  "Regexp matching a composable sequence of Devanagari characters.")
165
166(defconst bengali-composable-pattern
167  (let ((table
168	 '(("a" . "\u0981")		; SIGN CANDRABINDU
169	   ("A" . "[\u0982\u0983]")	; SIGN ANUSVARA .. VISARGA
170	   ("V" . "[\u0985-\u0994\u09E0\u09E1]") ; independent vowel
171	   ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant
172	   ("B" . "[\u09AC\u09AF\u09B0\u09F0]")		; BA, YA, RA
173	   ("R" . "[\u09B0\u09F0]")		; RA
174	   ("n" . "\u09BC")		; NUKTA
175	   ("v" . "[\u09BE-\u09CC\u09D7\u09E2\u09E3]") ; vowel sign
176	   ("H" . "\u09CD")		; HALANT
177	   ("T" . "\u09CE")		; KHANDA TA
178	   ("N" . "\u200C")		; ZWNJ
179	   ("J" . "\u200D")		; ZWJ
180	   ("X" . "[\u0980-\u09FF]"))))	; all coverage
181    (indian-compose-regexp
182     (concat
183      ;; syllables with an independent vowel, or
184      "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
185      ;; consonant-based syllables, or
186      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|"
187      ;; another syllables with an independent vowel, or
188      "\\(?:RH\\)?T\\|"
189      ;; special consonant form, or
190      "JHB\\|"
191      ;; any other singleton characters
192      "X")
193     table))
194  "Regexp matching a composable sequence of Bengali characters.")
195
196(defconst gurmukhi-composable-pattern
197  (let ((table
198	 '(("a" . "[\u0A01\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI
199	   ("A" . "\u0A03")		; SIGN VISARGA
200	   ("V" . "[\u0A05-\u0A14]")	; independent vowel
201	   ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]")	; consonant
202	   ("Y" . "[\u0A2F\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA
203	   ("n" . "\u0A3C")		; NUKTA
204	   ("v" . "[\u0A3E-\u0A4C]")	; vowel sign
205	   ("H" . "\u0A4D")		; VIRAMA
206	   ("N" . "\u200C")		; ZWNJ
207	   ("J" . "\u200D")		; ZWJ
208	   ("X" . "[\u0A00-\u0A7F]"))))	; all coverage
209    (indian-compose-regexp
210     (concat
211      ;; consonant-based syllables, or
212      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
213      ;; syllables with an independent vowel, or
214      "Vn?\\(?:J?HY\\)?v*n?a?A?\\|"
215      ;; special consonant form, or
216      "JHY\\|"
217      ;; any other singleton characters
218      "X")
219     table))
220  "Regexp matching a composable sequence of Gurmukhi characters.")
221
222(defconst gujarati-composable-pattern
223  (let ((table
224	 '(("a" . "[\u0A81\u0A82]")	; SIGN CANDRABINDU .. ANUSVARA
225	   ("A" . "\u0A83")		; SIGN VISARGA
226	   ("V" . "[\u0A85-\u0A94\u0AE0\u0AE1]") ; independent vowel
227	   ("C" . "[\u0A95-\u0AB9]")	; consonant
228	   ("R" . "\u0AB0")		; RA
229	   ("n" . "\u0ABC")		; NUKTA
230	   ("v" . "[\u0ABE-\u0ACC\u0AE2\u0AE3]") ; vowel sign
231	   ("H" . "\u0ACD")		; VIRAMA
232	   ("N" . "\u200C")		; ZWNJ
233	   ("J" . "\u200D")		; ZWJ
234	   ("X" . "[\u0A80-\u0AFF]"))))	; all coverage
235    (indian-compose-regexp
236     (concat
237      ;; syllables with an independent vowel, or
238      "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|"
239      ;; consonant-based syllables, or
240      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
241      ;; special consonant form, or
242      "JHR\\|"
243      ;; any other singleton characters
244      "X")
245     table))
246  "Regexp matching a composable sequence of Gujarati characters.")
247
248(defconst oriya-composable-pattern
249  (let ((table
250	 '(("a" . "\u0B01")		; SIGN CANDRABINDU
251	   ("A" . "[\u0B02\u0B03]")	; SIGN ANUSVARA .. VISARGA
252	   ("V" . "[\u0B05-\u0B14\u0B60\u0B61]") ; independent vowel
253	   ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B71]")	; consonant
254	   ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38\u0B39]") ; consonant with below form
255	   ("R" . "\u0B30")		; RA
256	   ("n" . "\u0B3C")		; NUKTA
257	   ("v" . "[\u0B3E-\u0B4C\u0B56\u0B57\u0B62\u0B63]") ; vowel sign
258	   ("H" . "\u0B4D")		; VIRAMA
259	   ("N" . "\u200C")		; ZWNJ
260	   ("J" . "\u200D")		; ZWJ
261	   ("X" . "[\u0B00-\u0B7F]"))))	; all coverage
262    (indian-compose-regexp
263     (concat
264      ;; syllables with an independent vowel, or
265      "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|"
266      ;; consonant-based syllables, or
267      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|"
268      ;; special consonant form, or
269      "JHB\\|"
270      ;; any other singleton characters
271      "X")
272     table))
273  "Regexp matching a composable sequence of Oriya characters.")
274
275(defconst tamil-composable-pattern
276  (let ((table
277	 '(("a" . "\u0B82")		; SIGN ANUSVARA
278	   ("V" . "[\u0B85-\u0B94]")	; independent vowel
279	   ("C" . "[\u0B95-\u0BB9]")	; consonant
280	   ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign
281	   ("H" . "\u0BCD")		; VIRAMA
282	   ("N" . "\u200C")		; ZWNJ
283	   ("J" . "\u200D")		; ZWJ
284	   ("X" . "[\u0B80-\u0BFF]"))))	; all coverage
285    (indian-compose-regexp
286     (concat
287      ;; consonant-based syllables, or
288      "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
289      ;; syllables with an independent vowel, or
290      "Vv*a?\\|"
291      ;; any other singleton characters
292      "X")
293     table))
294  "Regexp matching a composable sequence of Tamil characters.")
295
296(defconst telugu-composable-pattern
297  (let ((table
298	 '(("a" . "[\u0C01-\u0C03]")	; SIGN CANDRABINDU .. VISARGA
299	   ("V" . "[\u0C05-\u0C14\u0C60\u0C61]") ; independent vowel
300	   ("C" . "[\u0C15-\u0C39\u0C58\u0C59]") ; consonant
301	   ("v" . "[\u0C3E-\u0C4C\u0C55\u0C56\u0C62\u0C63]")	; vowel sign
302	   ("H" . "\u0C4D")		; VIRAMA
303	   ("N" . "\u200C")		; ZWNJ
304	   ("J" . "\u200D")		; ZWJ
305	   ("X" . "[\u0C00-\u0C7F]"))))	; all coverage
306    (indian-compose-regexp
307     (concat
308      ;; consonant-based syllables, or
309      "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|"
310      ;; syllables with an independent vowel, or
311      "V\\(?:J?HC\\)?v*a?\\|"
312      ;; special consonant form, or
313      "JHC\\|"
314      ;; any other singleton characters
315      "X")
316     table))
317  "Regexp matching a composable sequence of Telugu characters.")
318
319(defconst kannada-composable-pattern
320  (let ((table
321	 '(("A" . "[\u0C82\u0C83]")	; SIGN ANUSVARA .. VISARGA
322	   ("V" . "[\u0C85-\u0C94\u0CE0\u0CE1]") ; independent vowel
323	   ("C" . "[\u0C95-\u0CB9\u0CDE]")	 ; consonant
324	   ("R" . "\u0CB0")		; RA
325	   ("n" . "\u0CBC")		; NUKTA
326	   ("v" . "[\u0CBE-\u0CCC\u0CD5\u0CD6\u0CE2\u0CE3]") ; vowel sign
327	   ("H" . "\u0CCD")		; VIRAMA
328	   ("N" . "\u200C")		; ZWNJ
329	   ("J" . "\u200D")		; ZWJ
330	   ("X" . "[\u0C80-\u0CFF]"))))	; all coverage
331    (indian-compose-regexp
332     (concat
333      ;; syllables with an independent vowel, or
334      "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|"
335      ;; consonant-based syllables, or
336      "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|"
337      ;; special consonant form, or
338      "JHC\\|"
339      ;; any other singleton characters
340      "X")
341     table))
342  "Regexp matching a composable sequence of Kannada characters.")
343
344(defconst malayalam-composable-pattern
345  (let ((table
346	 '(("A" . "[\u0D02\u0D03]")	; SIGN ANUSVARA .. VISARGA
347	   ("V" . "[\u0D05-\u0D14\u0D60\u0D61]")  ; independent vowel
348	   ("C" . "[\u0D15-\u0D39]")		  ; consonant
349	   ("Y" . "[\u0D2F\u0D30\u0D32\u0D35]")   ; YA, RA, LA, VA
350	   ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62\u0D63]")	; postbase matra
351	   ("H" . "\u0D4D")			  ; SIGN VIRAMA
352	   ("N" . "\u200C")			  ; ZWNJ
353	   ("J" . "\u200D")			  ; ZWJ
354	   ("X" . "[\u0D00-\u0D7F]"))))		  ; all coverage
355    (indian-compose-regexp
356     (concat
357      ;; consonant-based syllables, or
358      "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|"
359      ;; syllables with an independent vowel, or
360      "V\\(?:J?HY\\)?v*?A?\\|"
361      ;; special consonant form, or
362      "JHY\\|"
363      ;; any other singleton characters
364      "X")
365     table))
366  "Regexp matching a composable sequence of Malayalam characters.")
367
368(let ((script-regexp-alist
369       `((devanagari . ,devanagari-composable-pattern)
370	 (bengali . ,bengali-composable-pattern)
371	 (gurmukhi . ,gurmukhi-composable-pattern)
372	 (gujarati . ,gujarati-composable-pattern)
373	 (oriya . ,oriya-composable-pattern)
374	 (tamil . ,tamil-composable-pattern)
375	 (telugu . ,telugu-composable-pattern)
376	 (kannada . ,kannada-composable-pattern)
377	 (malayalam . ,malayalam-composable-pattern))))
378  (map-char-table
379   #'(lambda (key val)
380       (let ((slot (assq val script-regexp-alist)))
381	 (if slot
382	     (set-char-table-range
383	      composition-function-table key
384	      (list (vector (cdr slot) 0 #'font-shape-gstring))))))
385   char-script-table))
386
387(provide 'indian)
388
389;;; indian.el ends here
390