1;;; indian.el --- Indian languages support -*- coding: utf-8; lexical-binding: t; -*- 2 3;; Copyright (C) 1997, 1999, 2001-2021 Free Software Foundation, Inc. 4;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 5;; National Institute of Advanced Industrial Science and Technology (AIST) 6;; Registration Number H14PRO021 7 8;; Keywords: multilingual, i18n, Indian 9 10;; This file is part of GNU Emacs. 11 12;; GNU Emacs is free software: you can redistribute it and/or modify 13;; it under the terms of the GNU General Public License as published by 14;; the Free Software Foundation, either version 3 of the License, or 15;; (at your option) any later version. 16 17;; GNU Emacs is distributed in the hope that it will be useful, 18;; but WITHOUT ANY WARRANTY; without even the implied warranty of 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20;; GNU General Public License for more details. 21 22;; You should have received a copy of the GNU General Public License 23;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 24 25;;; Commentary: 26 27;; This file contains definitions of Indian language environments, and 28;; setups for displaying the scripts used there. 29 30;;; Code: 31 32(define-coding-system 'in-is13194-devanagari 33 "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." 34 :coding-type 'iso-2022 35 :mnemonic ?D 36 :designation [ascii indian-is13194 nil nil] 37 :charset-list '(ascii indian-is13194) 38 :post-read-conversion 'in-is13194-post-read-conversion 39 :pre-write-conversion 'in-is13194-pre-write-conversion) 40 41(define-coding-system-alias 'devanagari 'in-is13194-devanagari) 42 43(set-language-info-alist 44 "Devanagari" '((charset unicode) 45 (coding-system utf-8) 46 (coding-priority utf-8) 47 (input-method . "devanagari-aiba") 48 (documentation . "\ 49Such languages using Devanagari script as Hindi and Marathi 50are supported in this language environment.")) 51 '("Indian")) 52 53(set-language-info-alist 54 "Bengali" '((charset unicode) 55 (coding-system utf-8) 56 (coding-priority utf-8) 57 (input-method . "bengali-itrans") 58 (documentation . "\ 59Such languages using Bengali script as Bengali and Assamese 60are supported in this language environment.")) 61 '("Indian")) 62 63(set-language-info-alist 64 "Punjabi" '((charset unicode) 65 (coding-system utf-8) 66 (coding-priority utf-8) 67 (input-method . "punjabi-itrans") 68 (documentation . "\ 69North Indian language Punjabi is supported in this language environment.")) 70 '("Indian")) 71 72(set-language-info-alist 73 "Gujarati" '((charset unicode) 74 (coding-system utf-8) 75 (coding-priority utf-8) 76 (input-method . "gujarati-itrans") 77 (documentation . "\ 78North Indian language Gujarati is supported in this language environment.")) 79 '("Indian")) 80 81(set-language-info-alist 82 "Oriya" '((charset unicode) 83 (coding-system utf-8) 84 (coding-priority utf-8) 85 (input-method . "oriya-itrans") 86 (documentation . "\ 87Such languages using Oriya script as Oriya, Khonti, and Santali 88are supported in this language environment.")) 89 '("Indian")) 90 91(set-language-info-alist 92 "Tamil" '((charset unicode) 93 (coding-system utf-8) 94 (coding-priority utf-8) 95 (input-method . "tamil-itrans") 96 (documentation . "\ 97South Indian Language Tamil is supported in this language environment.")) 98 '("Indian")) 99 100(set-language-info-alist 101 "Telugu" '((charset unicode) 102 (coding-system utf-8) 103 (coding-priority utf-8) 104 (input-method . "telugu-itrans") 105 (documentation . "\ 106South Indian Language Telugu is supported in this language environment.")) 107 '("Indian")) 108 109(set-language-info-alist 110 "Kannada" '((charset unicode) 111 (coding-system mule-utf-8) 112 (coding-priority mule-utf-8) 113 (input-method . "kannada-itrans") 114 (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") 115 (documentation . "\ 116Kannada language and script is supported in this language 117environment.")) 118 '("Indian")) 119 120(set-language-info-alist 121 "Malayalam" '((charset unicode) 122 (coding-system utf-8) 123 (coding-priority utf-8) 124 (input-method . "malayalam-itrans") 125 (documentation . "\ 126South Indian language Malayalam is supported in this language environment.")) 127 '("Indian")) 128 129;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is 130;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). 131 132(defun indian-compose-regexp (regexp table) 133 (let ((case-fold-search nil)) 134 (dolist (elt table) 135 (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) 136 regexp)) 137 138(defconst devanagari-composable-pattern 139 (let ((table 140 '(("a" . "[\u0900-\u0902]") ; vowel modifier (above) 141 ("A" . "\u0903") ; vowel modifier (post) 142 ("V" . "[\u0904-\u0914\u0960\u0961\u0972]") ; independent vowel 143 ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant 144 ("R" . "\u0930") ; RA 145 ("n" . "\u093C") ; NUKTA 146 ("v" . "[\u093E-\u094C\u094E\u0955\u0962\u0963]") ; vowel sign 147 ("H" . "\u094D") ; HALANT 148 ("s" . "[\u0951\u0952]") ; stress sign 149 ("t" . "[\u0953\u0954]") ; accent 150 ("N" . "\u200C") ; ZWNJ 151 ("J" . "\u200D") ; ZWJ 152 ("X" . "[\u0900-\u097F]")))) ; all coverage 153 (indian-compose-regexp 154 (concat 155 ;; syllables with an independent vowel, or 156 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|" 157 ;; consonant-based syllables, or 158 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|" 159 ;; special consonant form, or 160 "JHR\\|" 161 ;; any other singleton characters 162 "X") 163 table)) 164 "Regexp matching a composable sequence of Devanagari characters.") 165 166(defconst bengali-composable-pattern 167 (let ((table 168 '(("a" . "\u0981") ; SIGN CANDRABINDU 169 ("A" . "[\u0982\u0983]") ; SIGN ANUSVARA .. VISARGA 170 ("V" . "[\u0985-\u0994\u09E0\u09E1]") ; independent vowel 171 ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant 172 ("B" . "[\u09AC\u09AF\u09B0\u09F0]") ; BA, YA, RA 173 ("R" . "[\u09B0\u09F0]") ; RA 174 ("n" . "\u09BC") ; NUKTA 175 ("v" . "[\u09BE-\u09CC\u09D7\u09E2\u09E3]") ; vowel sign 176 ("H" . "\u09CD") ; HALANT 177 ("T" . "\u09CE") ; KHANDA TA 178 ("N" . "\u200C") ; ZWNJ 179 ("J" . "\u200D") ; ZWJ 180 ("X" . "[\u0980-\u09FF]")))) ; all coverage 181 (indian-compose-regexp 182 (concat 183 ;; syllables with an independent vowel, or 184 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" 185 ;; consonant-based syllables, or 186 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|" 187 ;; another syllables with an independent vowel, or 188 "\\(?:RH\\)?T\\|" 189 ;; special consonant form, or 190 "JHB\\|" 191 ;; any other singleton characters 192 "X") 193 table)) 194 "Regexp matching a composable sequence of Bengali characters.") 195 196(defconst gurmukhi-composable-pattern 197 (let ((table 198 '(("a" . "[\u0A01\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI 199 ("A" . "\u0A03") ; SIGN VISARGA 200 ("V" . "[\u0A05-\u0A14]") ; independent vowel 201 ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant 202 ("Y" . "[\u0A2F\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA 203 ("n" . "\u0A3C") ; NUKTA 204 ("v" . "[\u0A3E-\u0A4C]") ; vowel sign 205 ("H" . "\u0A4D") ; VIRAMA 206 ("N" . "\u200C") ; ZWNJ 207 ("J" . "\u200D") ; ZWJ 208 ("X" . "[\u0A00-\u0A7F]")))) ; all coverage 209 (indian-compose-regexp 210 (concat 211 ;; consonant-based syllables, or 212 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" 213 ;; syllables with an independent vowel, or 214 "Vn?\\(?:J?HY\\)?v*n?a?A?\\|" 215 ;; special consonant form, or 216 "JHY\\|" 217 ;; any other singleton characters 218 "X") 219 table)) 220 "Regexp matching a composable sequence of Gurmukhi characters.") 221 222(defconst gujarati-composable-pattern 223 (let ((table 224 '(("a" . "[\u0A81\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA 225 ("A" . "\u0A83") ; SIGN VISARGA 226 ("V" . "[\u0A85-\u0A94\u0AE0\u0AE1]") ; independent vowel 227 ("C" . "[\u0A95-\u0AB9]") ; consonant 228 ("R" . "\u0AB0") ; RA 229 ("n" . "\u0ABC") ; NUKTA 230 ("v" . "[\u0ABE-\u0ACC\u0AE2\u0AE3]") ; vowel sign 231 ("H" . "\u0ACD") ; VIRAMA 232 ("N" . "\u200C") ; ZWNJ 233 ("J" . "\u200D") ; ZWJ 234 ("X" . "[\u0A80-\u0AFF]")))) ; all coverage 235 (indian-compose-regexp 236 (concat 237 ;; syllables with an independent vowel, or 238 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|" 239 ;; consonant-based syllables, or 240 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" 241 ;; special consonant form, or 242 "JHR\\|" 243 ;; any other singleton characters 244 "X") 245 table)) 246 "Regexp matching a composable sequence of Gujarati characters.") 247 248(defconst oriya-composable-pattern 249 (let ((table 250 '(("a" . "\u0B01") ; SIGN CANDRABINDU 251 ("A" . "[\u0B02\u0B03]") ; SIGN ANUSVARA .. VISARGA 252 ("V" . "[\u0B05-\u0B14\u0B60\u0B61]") ; independent vowel 253 ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B71]") ; consonant 254 ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38\u0B39]") ; consonant with below form 255 ("R" . "\u0B30") ; RA 256 ("n" . "\u0B3C") ; NUKTA 257 ("v" . "[\u0B3E-\u0B4C\u0B56\u0B57\u0B62\u0B63]") ; vowel sign 258 ("H" . "\u0B4D") ; VIRAMA 259 ("N" . "\u200C") ; ZWNJ 260 ("J" . "\u200D") ; ZWJ 261 ("X" . "[\u0B00-\u0B7F]")))) ; all coverage 262 (indian-compose-regexp 263 (concat 264 ;; syllables with an independent vowel, or 265 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" 266 ;; consonant-based syllables, or 267 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" 268 ;; special consonant form, or 269 "JHB\\|" 270 ;; any other singleton characters 271 "X") 272 table)) 273 "Regexp matching a composable sequence of Oriya characters.") 274 275(defconst tamil-composable-pattern 276 (let ((table 277 '(("a" . "\u0B82") ; SIGN ANUSVARA 278 ("V" . "[\u0B85-\u0B94]") ; independent vowel 279 ("C" . "[\u0B95-\u0BB9]") ; consonant 280 ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign 281 ("H" . "\u0BCD") ; VIRAMA 282 ("N" . "\u200C") ; ZWNJ 283 ("J" . "\u200D") ; ZWJ 284 ("X" . "[\u0B80-\u0BFF]")))) ; all coverage 285 (indian-compose-regexp 286 (concat 287 ;; consonant-based syllables, or 288 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" 289 ;; syllables with an independent vowel, or 290 "Vv*a?\\|" 291 ;; any other singleton characters 292 "X") 293 table)) 294 "Regexp matching a composable sequence of Tamil characters.") 295 296(defconst telugu-composable-pattern 297 (let ((table 298 '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA 299 ("V" . "[\u0C05-\u0C14\u0C60\u0C61]") ; independent vowel 300 ("C" . "[\u0C15-\u0C39\u0C58\u0C59]") ; consonant 301 ("v" . "[\u0C3E-\u0C4C\u0C55\u0C56\u0C62\u0C63]") ; vowel sign 302 ("H" . "\u0C4D") ; VIRAMA 303 ("N" . "\u200C") ; ZWNJ 304 ("J" . "\u200D") ; ZWJ 305 ("X" . "[\u0C00-\u0C7F]")))) ; all coverage 306 (indian-compose-regexp 307 (concat 308 ;; consonant-based syllables, or 309 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" 310 ;; syllables with an independent vowel, or 311 "V\\(?:J?HC\\)?v*a?\\|" 312 ;; special consonant form, or 313 "JHC\\|" 314 ;; any other singleton characters 315 "X") 316 table)) 317 "Regexp matching a composable sequence of Telugu characters.") 318 319(defconst kannada-composable-pattern 320 (let ((table 321 '(("A" . "[\u0C82\u0C83]") ; SIGN ANUSVARA .. VISARGA 322 ("V" . "[\u0C85-\u0C94\u0CE0\u0CE1]") ; independent vowel 323 ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant 324 ("R" . "\u0CB0") ; RA 325 ("n" . "\u0CBC") ; NUKTA 326 ("v" . "[\u0CBE-\u0CCC\u0CD5\u0CD6\u0CE2\u0CE3]") ; vowel sign 327 ("H" . "\u0CCD") ; VIRAMA 328 ("N" . "\u200C") ; ZWNJ 329 ("J" . "\u200D") ; ZWJ 330 ("X" . "[\u0C80-\u0CFF]")))) ; all coverage 331 (indian-compose-regexp 332 (concat 333 ;; syllables with an independent vowel, or 334 "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|" 335 ;; consonant-based syllables, or 336 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|" 337 ;; special consonant form, or 338 "JHC\\|" 339 ;; any other singleton characters 340 "X") 341 table)) 342 "Regexp matching a composable sequence of Kannada characters.") 343 344(defconst malayalam-composable-pattern 345 (let ((table 346 '(("A" . "[\u0D02\u0D03]") ; SIGN ANUSVARA .. VISARGA 347 ("V" . "[\u0D05-\u0D14\u0D60\u0D61]") ; independent vowel 348 ("C" . "[\u0D15-\u0D39]") ; consonant 349 ("Y" . "[\u0D2F\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA 350 ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62\u0D63]") ; postbase matra 351 ("H" . "\u0D4D") ; SIGN VIRAMA 352 ("N" . "\u200C") ; ZWNJ 353 ("J" . "\u200D") ; ZWJ 354 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage 355 (indian-compose-regexp 356 (concat 357 ;; consonant-based syllables, or 358 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|" 359 ;; syllables with an independent vowel, or 360 "V\\(?:J?HY\\)?v*?A?\\|" 361 ;; special consonant form, or 362 "JHY\\|" 363 ;; any other singleton characters 364 "X") 365 table)) 366 "Regexp matching a composable sequence of Malayalam characters.") 367 368(let ((script-regexp-alist 369 `((devanagari . ,devanagari-composable-pattern) 370 (bengali . ,bengali-composable-pattern) 371 (gurmukhi . ,gurmukhi-composable-pattern) 372 (gujarati . ,gujarati-composable-pattern) 373 (oriya . ,oriya-composable-pattern) 374 (tamil . ,tamil-composable-pattern) 375 (telugu . ,telugu-composable-pattern) 376 (kannada . ,kannada-composable-pattern) 377 (malayalam . ,malayalam-composable-pattern)))) 378 (map-char-table 379 #'(lambda (key val) 380 (let ((slot (assq val script-regexp-alist))) 381 (if slot 382 (set-char-table-range 383 composition-function-table key 384 (list (vector (cdr slot) 0 #'font-shape-gstring)))))) 385 char-script-table)) 386 387(provide 'indian) 388 389;;; indian.el ends here 390