1;;; po.el --- basic support of PO translation files 2 3;; Copyright (C) 1995-1998, 2000-2021 Free Software Foundation, Inc. 4 5;; Authors: François Pinard <pinard@iro.umontreal.ca>, 6;; Greg McGary <gkm@magilla.cichlid.com>, 7;; Bruno Haible <bruno@clisp.org>. 8;; Keywords: i18n, files 9 10;; This file is part of GNU Emacs. 11 12;; GNU Emacs is free software: you can redistribute it and/or modify 13;; it under the terms of the GNU General Public License as published by 14;; the Free Software Foundation, either version 3 of the License, or 15;; (at your option) any later version. 16 17;; GNU Emacs is distributed in the hope that it will be useful, 18;; but WITHOUT ANY WARRANTY; without even the implied warranty of 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20;; GNU General Public License for more details. 21 22;; You should have received a copy of the GNU General Public License 23;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 24 25;;; Commentary: 26 27;; This package makes sure visiting PO files decodes them correctly, 28;; according to the Charset= header in the PO file. For more support 29;; for editing PO files, see po-mode.el. 30 31;;; Code: 32 33(defconst po-content-type-charset-alist 34 '(("ASCII" . undecided) 35 ("ANSI_X3.4-1968" . undecided) 36 ("US-ASCII" . undecided)) 37 "Alist of coding system versus GNU libc/libiconv canonical charset name. 38Contains canonical charset names that don't correspond to coding systems.") 39 40(defun po-find-charset (filename) 41 "Return PO charset value for FILENAME. 42If FILENAME is a cons cell, its CDR is a buffer that already contains 43the PO file (but not yet decoded)." 44 (let ((charset-regexp 45 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"") 46 (buf (and (consp filename) (cdr filename))) 47 (short-read nil)) 48 (when buf 49 (set-buffer buf) 50 (goto-char (point-min))) 51 ;; Try the first 4096 bytes. In case we cannot find the charset value 52 ;; within the first 4096 bytes (the PO file might start with a long 53 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure 54 ;; we've checked the empty header entry entirely. 55 (while (not (or short-read (re-search-forward "^msgid" nil t) buf)) 56 (save-excursion 57 (goto-char (point-max)) 58 (let ((pair (insert-file-contents-literally filename nil 59 (1- (point)) 60 (1- (+ (point) 4096))))) 61 (setq short-read (< (nth 1 pair) 4096))))) 62 (cond ((re-search-forward charset-regexp nil t) (match-string 1)) 63 ((or short-read buf) nil) 64 ;; We've found the first msgid; maybe, only a part of the msgstr 65 ;; value was loaded. Load the next 1024 bytes; if charset still 66 ;; isn't available, give up. 67 (t (save-excursion 68 (goto-char (point-max)) 69 (insert-file-contents-literally filename nil 70 (1- (point)) 71 (1- (+ (point) 1024)))) 72 (if (re-search-forward charset-regexp nil t) 73 (match-string 1)))))) 74 75(defun po-find-file-coding-system-guts (operation filename) 76 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME. 77Do so according to FILENAME's declared charset. 78FILENAME may be a cons (NAME . BUFFER). In that case, detect charset 79in BUFFER." 80 (and 81 (eq operation 'insert-file-contents) 82 (or (if (consp filename) (buffer-live-p (cdr filename))) 83 (file-exists-p filename)) 84 (with-temp-buffer 85 (let* ((coding-system-for-read 'no-conversion) 86 (charset (or (po-find-charset filename) "ascii")) 87 assoc) 88 (list (cond 89 ((setq assoc 90 (assoc-string charset 91 po-content-type-charset-alist 92 t)) 93 (cdr assoc)) 94 ((or (setq assoc (assoc-string charset coding-system-alist t)) 95 (setq assoc 96 (assoc-string (subst-char-in-string ?_ ?- 97 charset) 98 coding-system-alist t))) 99 (intern (car assoc))) 100 ;; In principle we should also check the `mime-charset' 101 ;; property of everything in the base coding system 102 ;; list, but there should always be a coding system 103 ;; corresponding to the MIME name. 104 ((featurep 'code-pages) 105 ;; Give up. 106 'raw-text) 107 (t 108 ;; Try again with code-pages loaded. Maybe it's best 109 ;; to require it initially? 110 (require 'code-pages nil t) 111 (if (or 112 (setq assoc (assoc-string charset coding-system-alist t)) 113 (setq assoc (assoc-string (subst-char-in-string 114 ?_ ?- charset) 115 coding-system-alist t))) 116 (intern (car assoc)) 117 'raw-text)))))))) 118 119;;;###autoload 120(defun po-find-file-coding-system (arg-list) 121 "Return a (DECODING . ENCODING) pair, according to PO file's charset. 122Called through `file-coding-system-alist', before the file is visited for real." 123 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list)))) 124 125(provide 'po) 126 127;;; po.el ends here 128