1;;;; catalogs.lisp -- XML Catalogs  -*- Mode: Lisp; readtable: runes -*-
2;;;;
3;;;; This file is part of the CXML parser, released under Lisp-LGPL.
4;;;; See file COPYING for details.
5;;;;
6;;;; Developed 2004 for headcraft - http://headcraft.de/
7;;;; Copyright: David Lichteblau
8
9(cl:in-package #:cxml)
10
11;;; http://www.oasis-open.org/committees/entity/spec.html
12;;;
13;;; Bugs:
14;;;   - We validate using the Catalog DTD while parsing, which is too strict
15;;;     and will will fail to parse files using other parser's extensions.
16;;;     (Jedenfalls behauptet das die Spec.)
17;;;     A long-term solution might be an XML Schema validator.
18
19(defvar *prefer* :public)
20(defvar *default-catalog*
21    '(;; libxml standard
22      "/etc/xml/catalog"
23      ;; FreeBSD
24      "/usr/local/share/xml/catalog.ports"))
25
26(defstruct (catalog (:constructor %make-catalog ()))
27  main-files
28  (dtd-cache (make-dtd-cache))
29  (file-table (puri:make-uri-space)))
30
31(defstruct (entry-file (:conc-name ""))
32  (system-entries)                      ;extid 2
33  (rewrite-system-entries)              ;      3
34  (delegate-system-entries)             ;      4
35  (public-entries)                      ;      5
36  (delegate-public-entries)             ;      6
37  (uri-entries)                         ;uri 2
38  (rewrite-uri-entries)                 ;    3
39  (delegate-uri-entries)                ;    4
40  (next-catalog-entries)                ;    5/7
41  )
42
43(defun starts-with-p (string prefix)
44  (let ((mismatch (mismatch string prefix)))
45    (or (null mismatch) (= mismatch (length prefix)))))
46
47(defun normalize-public (str)
48  (setf str (rod-to-utf8-string (rod str)))
49  (flet ((whitespacep (c)
50           (find c #.(map 'string #'code-char '(#x9 #xa #xd #x20)))))
51    (let ((start (position-if-not #'whitespacep str))
52          (end (position-if-not #'whitespacep str :from-end t))
53          (spacep nil))
54      (with-output-to-string (out)
55        (when start
56          (loop for i from start to end do
57                (let ((c (char str i)))
58                  (cond
59                    ((whitespacep c)
60                      (unless spacep
61                        (setf spacep t)
62                        (write-char #\space out)))
63                    (t
64                      (setf spacep nil)
65                      (write-char c out))))))))))
66
67(defun normalize-uri (str)
68  (when (typep str 'puri:uri)
69    (setf str (puri:render-uri str nil)))
70  (setf str (rod-to-utf8-string (rod str)))
71  (with-output-to-string (out)
72    (loop for ch across str do
73          (let ((c (char-code ch)))
74            (if (< c 15)
75                (write-string (string-upcase (format nil "%~2,'0X" c)) out)
76                (write-char ch out))))))
77
78(defun unwrap-publicid (str)
79  (normalize-public
80   (with-output-to-string (out)
81     (let ((i (length "urn:publicid:"))
82           (n (length str)))
83       (while (< i n)
84         (let ((c (char str i)))
85           (case c
86             (#\+ (write-char #\space out))
87             (#\: (write-string "//" out))
88             (#\; (write-string "::" out))
89             (#\%
90               (let ((code
91                      (parse-integer str
92                                     :start (+ i 1)
93                                     :end (+ i 3)
94                                     :radix 16)))
95                 (write-char (code-char code) out))
96               (incf i 2))
97             (t (write-char c out))))
98         (incf i))))))
99
100(defun match-exact (key table &optional check-prefer)
101  (dolist (pair table)
102    (destructuring-bind (from to &optional prefer) pair
103      (when (and (equal key from) (or (not check-prefer) (eq prefer :public)))
104        (return to)))))
105
106(defun match-prefix/rewrite (key table &optional check-prefer)
107  (let ((match nil)
108        (match-length -1))
109    (dolist (pair table)
110      (destructuring-bind (from to &optional prefer) pair
111        (when (and (or (not check-prefer) (eq prefer :public))
112                   (starts-with-p key from)
113                   (> (length from) match-length))
114          (setf match-length (length from))
115          (setf match to))))
116    (if match
117        (concatenate 'string
118          match
119          (subseq key match-length))
120        nil)))
121
122(defun match-prefix/sorted (key table &optional check-prefer)
123  (let ((result '()))
124    (dolist (pair table)
125      (destructuring-bind (from to &optional prefer) pair
126        (when (and (or (not check-prefer) (eq prefer :public))
127                   (starts-with-p key from))
128          (push (cons (length from) to) result))))
129    (mapcar #'cdr (sort result #'> :key #'car))))
130
131(defun resolve-extid (public system catalog)
132  (when public (setf public (normalize-public public)))
133  (when system (setf system (normalize-uri system)))
134  (when (and system (starts-with-p system "urn:publicid:"))
135    (let ((new-public (unwrap-publicid system)))
136      (assert (or (null public) (equal public new-public)))
137      (setf public new-public
138            system nil)))
139  (let ((files (catalog-main-files catalog))
140        (seen '()))
141    (while files
142      (let ((file (pop files))
143            (delegates nil))
144        (unless (typep file 'entry-file)
145          (setf file (find-catalog-file file catalog)))
146        (unless (or (null file) (member file seen))
147          (push file seen)
148          (when system
149            (let ((result
150                   (or (match-exact system (system-entries file))
151                       (match-prefix/rewrite
152                        system
153                        (rewrite-system-entries file)))))
154              (when result
155                (return result))
156              (setf delegates
157                    (match-prefix/sorted
158                     system
159                     (delegate-system-entries file)))))
160          (when (and public (not delegates))
161            (let* ((check-prefer (and system t))
162                   (result
163                    (match-exact public
164                                 (public-entries file)
165                                 check-prefer)))
166              (when result
167                (return result))
168              (setf delegates
169                    (match-prefix/sorted
170                     public
171                     (delegate-public-entries file)
172                     check-prefer))))
173          (if delegates
174              (setf files delegates)
175              (setf files (append (next-catalog-entries file) files))))))))
176
177(defun resolve-uri (uri catalog)
178  (setf uri (normalize-uri uri))
179  (when (starts-with-p uri "urn:publicid:")
180    (return-from resolve-uri
181      (resolve-extid (unwrap-publicid uri) nil catalog)))
182  (let ((files (catalog-main-files catalog))
183        (seen '()))
184    (while files
185      (let ((file (pop files)))
186        (unless (typep file 'entry-file)
187          (setf file (find-catalog-file file catalog)))
188        (unless (or (null file) (member file seen))
189          (push file seen)
190          (let ((result
191                 (or (match-exact uri (uri-entries file))
192                     (match-prefix/rewrite uri (rewrite-uri-entries file)))))
193            (when result
194              (return result))
195            (let* ((delegate-entries
196                    (delegate-uri-entries file))
197                   (delegates
198                    (match-prefix/sorted uri delegate-entries)))
199              (if delegates
200                  (setf files delegates)
201                  (setf files (append (next-catalog-entries file) files))))))))))
202
203(defun find-catalog-file (uri catalog)
204  (setf uri (if (stringp uri) (safe-parse-uri uri) uri))
205  (let* ((*dtd-cache* (catalog-dtd-cache catalog))
206         (*cache-all-dtds* t)
207         (file (parse-catalog-file uri)))
208    (when file
209      (let ((interned (puri:intern-uri uri (catalog-file-table catalog))))
210        (setf (getf (puri:uri-plist interned) 'catalog) file)))
211    file))
212
213(defun make-catalog (&optional (uris *default-catalog*))
214  (let ((result (%make-catalog)))
215    (setf (catalog-main-files result)
216          (loop
217              for uri in uris
218              for file = (find-catalog-file uri result)
219              when file collect file))
220    result))
221
222(defun parse-catalog-file (uri)
223  (handler-case
224      (parse-catalog-file/strict uri)
225    ((or file-error xml-parse-error) (c)
226      (warn "ignoring catalog error: ~A" c))))
227
228(defparameter *catalog-dtd*
229    (let* ((cxml
230            (slot-value (asdf:find-system :cxml) 'asdf::relative-pathname))
231           (dtd (merge-pathnames "catalog.dtd" cxml)))
232      (with-open-file (s dtd :element-type '(unsigned-byte 8))
233        (let ((bytes
234               (make-array (file-length s) :element-type '(unsigned-byte 8))))
235          (read-sequence bytes s)
236          bytes))))
237
238(defun parse-catalog-file/strict (uri)
239  (let* ((*catalog* nil)
240         (dtd-sysid
241          (puri:parse-uri "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd")))
242    (flet ((entity-resolver (public system)
243             (declare (ignore public))
244             (if (puri:uri= system dtd-sysid)
245                 (make-octet-input-stream *catalog-dtd*)
246                 nil)))
247      (with-open-stream (s (open (uri-to-pathname uri)
248                                 :element-type '(unsigned-byte 8)
249                                 :direction :input))
250        (parse-stream s
251                      (make-instance 'catalog-parser :uri uri)
252                      :validate nil
253                      :dtd (make-extid nil dtd-sysid)
254                      :root #"catalog"
255                      :entity-resolver #'entity-resolver)))))
256
257(defclass catalog-parser (sax:default-handler)
258  ((result :initform (make-entry-file) :accessor result)
259   (next :initform '() :accessor next)
260   (prefer-stack :initform (list *prefer*) :accessor prefer-stack)
261   (catalog-base-stack :accessor catalog-base-stack)))
262
263(defmethod initialize-instance :after
264    ((instance catalog-parser) &key uri)
265  (setf (catalog-base-stack instance) (list uri)))
266
267(defmethod prefer ((handler catalog-parser))
268  (car (prefer-stack handler)))
269
270(defmethod base ((handler catalog-parser))
271  (car (catalog-base-stack handler)))
272
273(defun get-attribute/lname (name attributes)
274  (let ((a (find name attributes
275                 :key (lambda (a)
276                        (or (sax:attribute-local-name a)
277                            (sax:attribute-qname a)))
278                 :test #'string=)))
279    (and a (sax:attribute-value a))))
280
281(defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs)
282  (declare (ignore uri))
283  (setf lname (or lname qname))
284  ;; we can dispatch on lnames only because we validate against the DTD,
285  ;; which disallows other namespaces.
286  ;; FIXME: we don't, because we can't.
287  (push (let ((new (get-attribute/lname "prefer" attrs)))
288          (cond
289            ((equal new "public") :public)
290            ((equal new "system") :system)
291            ((null new) (prefer handler))))
292        (prefer-stack handler))
293  (push (string-or (get-attribute/lname "base" attrs) (base handler))
294        (catalog-base-stack handler))
295  (flet ((geturi (lname)
296           (puri:merge-uris
297            (safe-parse-uri (get-attribute/lname lname attrs))
298            (base handler))))
299    (cond
300      ((string= lname "public")
301        (push (list (normalize-public (get-attribute/lname "publicId" attrs))
302                    (geturi "uri")
303                    (prefer handler))
304              (public-entries (result handler))))
305      ((string= lname "system")
306        (push (list (normalize-uri (get-attribute/lname "systemId" attrs))
307                    (geturi "uri"))
308              (system-entries (result handler))))
309      ((string= lname "uri")
310        (push (list (normalize-uri (get-attribute/lname "name" attrs))
311                    (geturi "uri"))
312              (uri-entries (result handler))))
313      ((string= lname "rewriteSystem")
314        (push (list (normalize-uri
315                     (get-attribute/lname "systemIdStartString" attrs))
316                    (get-attribute/lname "rewritePrefix" attrs))
317              (rewrite-system-entries (result handler))))
318      ((string= lname "rewriteURI")
319        (push (list (normalize-uri
320                     (get-attribute/lname "uriStartString" attrs))
321                    (get-attribute/lname "rewritePrefix" attrs))
322              (rewrite-uri-entries (result handler))))
323      ((string= lname "delegatePublic")
324        (push (list (normalize-public
325                     (get-attribute/lname "publicIdStartString" attrs))
326                    (geturi "catalog")
327                    (prefer handler))
328              (delegate-public-entries (result handler))))
329      ((string= lname "delegateSystem")
330        (push (list (normalize-uri
331                     (get-attribute/lname "systemIdStartString" attrs))
332                    (geturi "catalog"))
333              (delegate-system-entries (result handler))))
334      ((string= lname "delegateURI")
335        (push (list (normalize-uri
336                     (get-attribute/lname "uriStartString" attrs))
337                    (geturi "catalog"))
338              (delegate-uri-entries (result handler))))
339      ((string= lname "nextCatalog")
340        (push (geturi "catalog")
341              (next-catalog-entries (result handler)))))))
342
343(defmethod sax:end-element ((handler catalog-parser) uri lname qname)
344  (declare (ignore uri lname qname))
345  (pop (catalog-base-stack handler))
346  (pop (prefer-stack handler)))
347
348(defmethod sax:end-document ((handler catalog-parser))
349  (result handler))
350