1;;; rfc2231.el --- Functions for decoding rfc2231 headers -*- lexical-binding:t -*- 2 3;; Copyright (C) 1998-2021 Free Software Foundation, Inc. 4 5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 6;; This file is part of GNU Emacs. 7 8;; GNU Emacs is free software: you can redistribute it and/or modify 9;; it under the terms of the GNU General Public License as published by 10;; the Free Software Foundation, either version 3 of the License, or 11;; (at your option) any later version. 12 13;; GNU Emacs is distributed in the hope that it will be useful, 14;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16;; GNU General Public License for more details. 17 18;; You should have received a copy of the GNU General Public License 19;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 20 21;;; Commentary: 22 23;;; Code: 24 25(eval-when-compile (require 'cl-lib)) 26(require 'ietf-drums) 27(require 'rfc2047) 28(autoload 'mm-encode-body "mm-bodies") 29(autoload 'mail-header-remove-whitespace "mail-parse") 30(autoload 'mail-header-remove-comments "mail-parse") 31 32(defun rfc2231-get-value (ct attribute) 33 "Return the value of ATTRIBUTE from CT." 34 (cdr (assq attribute (cdr ct)))) 35 36(defun rfc2231-parse-qp-string (string) 37 "Parse QP-encoded string using `rfc2231-parse-string'. 38N.B. This is in violation with RFC2047, but it seem to be in common use." 39 (rfc2231-parse-string (rfc2047-decode-string string))) 40 41(defun rfc2231-parse-string (string &optional signal-error) 42 "Parse STRING and return a list. 43The list will be on the form 44 `(name (attribute . value) (attribute . value)...)'. 45 46If the optional SIGNAL-ERROR is non-nil, signal an error when this 47function fails in parsing of parameters. Otherwise, this function 48must never cause a Lisp error." 49 (with-temp-buffer 50 (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token)) 51 (stoken (ietf-drums-token-to-list ietf-drums-tspecials)) 52 (ntoken (ietf-drums-token-to-list "0-9")) 53 c type attribute encoded number parameters value) 54 (ietf-drums-init 55 (condition-case nil 56 (mail-header-remove-whitespace 57 (mail-header-remove-comments string)) 58 ;; The most likely cause of an error is unbalanced parentheses 59 ;; or double-quotes. If all parentheses and double-quotes are 60 ;; quoted meaninglessly with backslashes, removing them might 61 ;; make it parsable. Let's try... 62 (error 63 (let (mod) 64 (when (and (string-match "\\\\\"" string) 65 (not (string-match "\\`\"\\|[^\\]\"" string))) 66 (setq string (replace-regexp-in-string "\\\\\"" "\"" string) 67 mod t)) 68 (when (and (string-match "\\\\(" string) 69 (string-match "\\\\)" string) 70 (not (string-match "\\`(\\|[^\\][()]" string))) 71 (setq string (replace-regexp-in-string 72 "\\\\\\([()]\\)" "\\1" string) 73 mod t)) 74 (or (and mod 75 (ignore-errors 76 (mail-header-remove-whitespace 77 (mail-header-remove-comments string)))) 78 ;; Finally, attempt to extract only type. 79 (if (string-match 80 (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+" 81 "\\(?:/[^" ietf-drums-tspecials 82 "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)") 83 string) 84 (match-string 1 string) 85 "")))))) 86 (let ((table (copy-syntax-table ietf-drums-syntax-table))) 87 (modify-syntax-entry ?\' "w" table) 88 (modify-syntax-entry ?* " " table) 89 (modify-syntax-entry ?\; " " table) 90 (modify-syntax-entry ?= " " table) 91 ;; The following isn't valid, but one should be liberal 92 ;; in what one receives. 93 (modify-syntax-entry ?\: "w" table) 94 (set-syntax-table table)) 95 (setq c (char-after)) 96 (when (and (memq c ttoken) 97 (not (memq c stoken)) 98 (setq type (ignore-errors 99 (downcase 100 (buffer-substring (point) (progn 101 (forward-sexp 1) 102 (point))))))) 103 ;; Do the params 104 (condition-case err 105 (progn 106 (while (not (eobp)) 107 (setq c (char-after)) 108 (unless (eq c ?\;) 109 (error "Invalid header: %s" string)) 110 (forward-char 1) 111 ;; If c in nil, then this is an invalid header, but 112 ;; since elm generates invalid headers on this form, 113 ;; we allow it. 114 (when (setq c (char-after)) 115 (if (and (memq c ttoken) 116 (not (memq c stoken))) 117 (setq attribute 118 (intern 119 (downcase 120 (buffer-substring 121 (point) (progn (forward-sexp 1) (point)))))) 122 (error "Invalid header: %s" string)) 123 (setq c (char-after)) 124 (if (eq c ?*) 125 (progn 126 (forward-char 1) 127 (setq c (char-after)) 128 (if (not (memq c ntoken)) 129 (setq encoded t 130 number nil) 131 (setq number 132 (string-to-number 133 (buffer-substring 134 (point) (progn (forward-sexp 1) (point))))) 135 (setq c (char-after)) 136 (when (eq c ?*) 137 (setq encoded t) 138 (forward-char 1) 139 (setq c (char-after))))) 140 (setq number nil 141 encoded nil)) 142 (unless (eq c ?=) 143 (error "Invalid header: %s" string)) 144 (forward-char 1) 145 (setq c (char-after)) 146 (cond 147 ((eq c ?\") 148 (setq value (buffer-substring (1+ (point)) 149 (progn 150 (forward-sexp 1) 151 (1- (point))))) 152 (when encoded 153 (setq value (mapconcat (lambda (c) (format "%%%02x" c)) 154 value "")))) 155 ((and (or (memq c ttoken) 156 ;; EXTENSION: Support non-ascii chars. 157 (> c ?\177)) 158 (not (memq c stoken))) 159 (setq value 160 (buffer-substring 161 (point) 162 (progn 163 ;; Jump over asterisk, non-ASCII 164 ;; and non-boundary characters. 165 (while (and c 166 (or (eq c ?*) 167 (> c ?\177) 168 (not (eq (char-syntax c) ? )))) 169 (forward-char 1) 170 (setq c (char-after))) 171 (point))))) 172 (t 173 (error "Invalid header: %s" string))) 174 (push (list attribute value number encoded) 175 parameters)))) 176 (error 177 (setq parameters nil) 178 (when signal-error 179 (signal (car err) (cdr err))))) 180 181 ;; Now collect and concatenate continuation parameters. 182 (let ((cparams nil) 183 elem) 184 (cl-loop for (attribute value part encoded) 185 in (sort parameters (lambda (e1 e2) 186 (< (or (caddr e1) 0) 187 (or (caddr e2) 0)))) 188 do (cond 189 ;; First part. 190 ((or (not (setq elem (assq attribute cparams))) 191 (and (numberp part) 192 (zerop part))) 193 (push (list attribute value encoded) cparams)) 194 ;; Repetition of a part; do nothing. 195 ((and elem 196 (null number)) 197 ) 198 ;; Concatenate continuation parts. 199 (t 200 (setcar (cdr elem) (concat (cadr elem) value))))) 201 ;; Finally decode encoded values. 202 (cons type (mapcar 203 (lambda (elem) 204 (cons (car elem) 205 (if (nth 2 elem) 206 (rfc2231-decode-encoded-string (nth 1 elem)) 207 (nth 1 elem)))) 208 (nreverse cparams)))))))) 209 210(defun rfc2231-decode-encoded-string (string) 211 "Decode an RFC2231-encoded string. 212These look like: 213 \"us-ascii\\='en-us\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\", 214 \"us-ascii\\='\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\", 215 \"\\='en-us\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\", 216 \"\\='\\='This%20is%20%2A%2A%2Afun%2A%2A%2A\", or 217 \"This is ***fun***\"." 218 (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string) 219 (let ((coding-system (mm-charset-to-coding-system 220 (match-string 1 string) nil t)) 221 ;;(language (match-string 2 string)) 222 (value (match-string 3 string))) 223 (mm-with-unibyte-buffer 224 (insert value) 225 (goto-char (point-min)) 226 (while (re-search-forward "%\\([[:xdigit:]][[:xdigit:]]\\)" nil t) 227 (insert 228 (prog1 229 (string-to-number (match-string 1) 16) 230 (delete-region (match-beginning 0) (match-end 0))))) 231 ;; Decode using the charset, if any. 232 (if (memq coding-system '(nil ascii)) 233 (buffer-string) 234 (decode-coding-string (buffer-string) coding-system))))) 235 236(defun rfc2231-encode-string (param value) 237 "Return a PARAM=VALUE string encoded according to RFC2231. 238Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert 239the result of this function." 240 (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token)) 241 (tspecial (ietf-drums-token-to-list ietf-drums-tspecials)) 242 (special (ietf-drums-token-to-list "*'%\n\t")) 243 (ascii (ietf-drums-token-to-list ietf-drums-text-token)) 244 (num -1) 245 ;; Don't make lines exceeding 76 column. 246 (limit (- 74 (length param))) 247 spacep encodep charsetp charset broken) 248 (mm-with-multibyte-buffer 249 (insert value) 250 (goto-char (point-min)) 251 (while (not (eobp)) 252 (cond 253 ((or (memq (following-char) control) 254 (memq (following-char) tspecial) 255 (memq (following-char) special)) 256 (setq encodep t)) 257 ((eq (following-char) ? ) 258 (setq spacep t)) 259 ((not (memq (following-char) ascii)) 260 (setq charsetp t))) 261 (forward-char 1)) 262 (when charsetp 263 (setq charset (mm-encode-body))) 264 (mm-disable-multibyte) 265 (cond 266 ((or encodep charsetp 267 (progn 268 (end-of-line) 269 (> (current-column) (if spacep (- limit 2) limit)))) 270 (setq limit (- limit 6)) 271 (goto-char (point-min)) 272 (insert (symbol-name (or charset 'us-ascii)) "''") 273 (while (not (eobp)) 274 (if (or (not (memq (following-char) ascii)) 275 (memq (following-char) control) 276 (memq (following-char) tspecial) 277 (memq (following-char) special) 278 (eq (following-char) ? )) 279 (progn 280 (when (>= (current-column) (1- limit)) 281 (insert ";\n") 282 (setq broken t)) 283 (insert "%" (format "%02x" (following-char))) 284 (delete-char 1)) 285 (when (> (current-column) limit) 286 (insert ";\n") 287 (setq broken t)) 288 (forward-char 1))) 289 (goto-char (point-min)) 290 (if (not broken) 291 (insert param "*=") 292 (while (not (eobp)) 293 (insert (if (>= num 0) " " "") 294 param "*" (format "%d" (cl-incf num)) "*=") 295 (forward-line 1)))) 296 (spacep 297 (goto-char (point-min)) 298 (insert param "=\"") 299 (goto-char (point-max)) 300 (insert "\"")) 301 (t 302 (goto-char (point-min)) 303 (insert param "="))) 304 (buffer-string)))) 305 306(provide 'rfc2231) 307 308;;; rfc2231.el ends here 309