1;;; po.el --- basic support of PO translation files
2
3;; Copyright (C) 1995-1998, 2000-2021 Free Software Foundation, Inc.
4
5;; Authors: François Pinard <pinard@iro.umontreal.ca>,
6;;          Greg McGary <gkm@magilla.cichlid.com>,
7;;          Bruno Haible <bruno@clisp.org>.
8;; Keywords: i18n, files
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software: you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
24
25;;; Commentary:
26
27;; This package makes sure visiting PO files decodes them correctly,
28;; according to the Charset= header in the PO file.  For more support
29;; for editing PO files, see po-mode.el.
30
31;;; Code:
32
33(defconst po-content-type-charset-alist
34  '(("ASCII" . undecided)
35    ("ANSI_X3.4-1968" . undecided)
36    ("US-ASCII" . undecided))
37  "Alist of coding system versus GNU libc/libiconv canonical charset name.
38Contains canonical charset names that don't correspond to coding systems.")
39
40(defun po-find-charset (filename)
41  "Return PO charset value for FILENAME.
42If FILENAME is a cons cell, its CDR is a buffer that already contains
43the PO file (but not yet decoded)."
44  (let ((charset-regexp
45	 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
46	(buf (and (consp filename) (cdr filename)))
47	(short-read nil))
48    (when buf
49      (set-buffer buf)
50      (goto-char (point-min)))
51    ;; Try the first 4096 bytes.  In case we cannot find the charset value
52    ;; within the first 4096 bytes (the PO file might start with a long
53    ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
54    ;; we've checked the empty header entry entirely.
55    (while (not (or short-read (re-search-forward "^msgid" nil t) buf))
56      (save-excursion
57        (goto-char (point-max))
58	(let ((pair (insert-file-contents-literally filename nil
59						    (1- (point))
60						    (1- (+ (point) 4096)))))
61	  (setq short-read (< (nth 1 pair) 4096)))))
62    (cond ((re-search-forward charset-regexp nil t) (match-string 1))
63	  ((or short-read buf) nil)
64	  ;; We've found the first msgid; maybe, only a part of the msgstr
65	  ;; value was loaded.  Load the next 1024 bytes; if charset still
66	  ;; isn't available, give up.
67	  (t (save-excursion
68	       (goto-char (point-max))
69	       (insert-file-contents-literally filename nil
70					       (1- (point))
71					       (1- (+ (point) 1024))))
72	     (if (re-search-forward charset-regexp nil t)
73		 (match-string 1))))))
74
75(defun po-find-file-coding-system-guts (operation filename)
76  "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
77Do so according to FILENAME's declared charset.
78FILENAME may be a cons (NAME . BUFFER).  In that case, detect charset
79in BUFFER."
80  (and
81   (eq operation 'insert-file-contents)
82   (or (if (consp filename) (buffer-live-p (cdr filename)))
83       (file-exists-p filename))
84   (with-temp-buffer
85     (let* ((coding-system-for-read 'no-conversion)
86	    (charset (or (po-find-charset filename) "ascii"))
87	    assoc)
88       (list (cond
89	      ((setq assoc
90		     (assoc-string charset
91                                   po-content-type-charset-alist
92                                   t))
93	       (cdr assoc))
94	      ((or (setq assoc (assoc-string charset coding-system-alist t))
95		   (setq assoc
96			 (assoc-string (subst-char-in-string ?_ ?-
97                                                             charset)
98                                       coding-system-alist t)))
99	       (intern (car assoc)))
100	      ;; In principle we should also check the `mime-charset'
101	      ;; property of everything in the base coding system
102	      ;; list, but there should always be a coding system
103	      ;; corresponding to the MIME name.
104	      ((featurep 'code-pages)
105	       ;; Give up.
106	       'raw-text)
107	      (t
108	       ;; Try again with code-pages loaded.  Maybe it's best
109	       ;; to require it initially?
110	       (require 'code-pages nil t)
111	       (if (or
112		    (setq assoc (assoc-string charset coding-system-alist t))
113		    (setq assoc (assoc-string (subst-char-in-string
114                                               ?_ ?- charset)
115                                              coding-system-alist t)))
116		   (intern (car assoc))
117		 'raw-text))))))))
118
119;;;###autoload
120(defun po-find-file-coding-system (arg-list)
121  "Return a (DECODING . ENCODING) pair, according to PO file's charset.
122Called through `file-coding-system-alist', before the file is visited for real."
123  (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
124
125(provide 'po)
126
127;;; po.el ends here
128