1;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*- 2;;;; 3;;;; This file is part of the CXML parser, released under Lisp-LGPL. 4;;;; See file COPYING for details. 5;;;; 6;;;; Developed 2004 for headcraft - http://headcraft.de/ 7;;;; Copyright: David Lichteblau 8 9(cl:in-package #:cxml) 10 11;;; http://www.oasis-open.org/committees/entity/spec.html 12;;; 13;;; Bugs: 14;;; - We validate using the Catalog DTD while parsing, which is too strict 15;;; and will will fail to parse files using other parser's extensions. 16;;; (Jedenfalls behauptet das die Spec.) 17;;; A long-term solution might be an XML Schema validator. 18 19(defvar *prefer* :public) 20(defvar *default-catalog* 21 '(;; libxml standard 22 "/etc/xml/catalog" 23 ;; FreeBSD 24 "/usr/local/share/xml/catalog.ports")) 25 26(defstruct (catalog (:constructor %make-catalog ())) 27 main-files 28 (dtd-cache (make-dtd-cache)) 29 (file-table (puri:make-uri-space))) 30 31(defstruct (entry-file (:conc-name "")) 32 (system-entries) ;extid 2 33 (rewrite-system-entries) ; 3 34 (delegate-system-entries) ; 4 35 (public-entries) ; 5 36 (delegate-public-entries) ; 6 37 (uri-entries) ;uri 2 38 (rewrite-uri-entries) ; 3 39 (delegate-uri-entries) ; 4 40 (next-catalog-entries) ; 5/7 41 ) 42 43(defun starts-with-p (string prefix) 44 (let ((mismatch (mismatch string prefix))) 45 (or (null mismatch) (= mismatch (length prefix))))) 46 47(defun normalize-public (str) 48 (setf str (rod-to-utf8-string (rod str))) 49 (flet ((whitespacep (c) 50 (find c #.(map 'string #'code-char '(#x9 #xa #xd #x20))))) 51 (let ((start (position-if-not #'whitespacep str)) 52 (end (position-if-not #'whitespacep str :from-end t)) 53 (spacep nil)) 54 (with-output-to-string (out) 55 (when start 56 (loop for i from start to end do 57 (let ((c (char str i))) 58 (cond 59 ((whitespacep c) 60 (unless spacep 61 (setf spacep t) 62 (write-char #\space out))) 63 (t 64 (setf spacep nil) 65 (write-char c out)))))))))) 66 67(defun normalize-uri (str) 68 (when (typep str 'puri:uri) 69 (setf str (puri:render-uri str nil))) 70 (setf str (rod-to-utf8-string (rod str))) 71 (with-output-to-string (out) 72 (loop for ch across str do 73 (let ((c (char-code ch))) 74 (if (< c 15) 75 (write-string (string-upcase (format nil "%~2,'0X" c)) out) 76 (write-char ch out)))))) 77 78(defun unwrap-publicid (str) 79 (normalize-public 80 (with-output-to-string (out) 81 (let ((i (length "urn:publicid:")) 82 (n (length str))) 83 (while (< i n) 84 (let ((c (char str i))) 85 (case c 86 (#\+ (write-char #\space out)) 87 (#\: (write-string "//" out)) 88 (#\; (write-string "::" out)) 89 (#\% 90 (let ((code 91 (parse-integer str 92 :start (+ i 1) 93 :end (+ i 3) 94 :radix 16))) 95 (write-char (code-char code) out)) 96 (incf i 2)) 97 (t (write-char c out)))) 98 (incf i)))))) 99 100(defun match-exact (key table &optional check-prefer) 101 (dolist (pair table) 102 (destructuring-bind (from to &optional prefer) pair 103 (when (and (equal key from) (or (not check-prefer) (eq prefer :public))) 104 (return to))))) 105 106(defun match-prefix/rewrite (key table &optional check-prefer) 107 (let ((match nil) 108 (match-length -1)) 109 (dolist (pair table) 110 (destructuring-bind (from to &optional prefer) pair 111 (when (and (or (not check-prefer) (eq prefer :public)) 112 (starts-with-p key from) 113 (> (length from) match-length)) 114 (setf match-length (length from)) 115 (setf match to)))) 116 (if match 117 (concatenate 'string 118 match 119 (subseq key match-length)) 120 nil))) 121 122(defun match-prefix/sorted (key table &optional check-prefer) 123 (let ((result '())) 124 (dolist (pair table) 125 (destructuring-bind (from to &optional prefer) pair 126 (when (and (or (not check-prefer) (eq prefer :public)) 127 (starts-with-p key from)) 128 (push (cons (length from) to) result)))) 129 (mapcar #'cdr (sort result #'> :key #'car)))) 130 131(defun resolve-extid (public system catalog) 132 (when public (setf public (normalize-public public))) 133 (when system (setf system (normalize-uri system))) 134 (when (and system (starts-with-p system "urn:publicid:")) 135 (let ((new-public (unwrap-publicid system))) 136 (assert (or (null public) (equal public new-public))) 137 (setf public new-public 138 system nil))) 139 (let ((files (catalog-main-files catalog)) 140 (seen '())) 141 (while files 142 (let ((file (pop files)) 143 (delegates nil)) 144 (unless (typep file 'entry-file) 145 (setf file (find-catalog-file file catalog))) 146 (unless (or (null file) (member file seen)) 147 (push file seen) 148 (when system 149 (let ((result 150 (or (match-exact system (system-entries file)) 151 (match-prefix/rewrite 152 system 153 (rewrite-system-entries file))))) 154 (when result 155 (return result)) 156 (setf delegates 157 (match-prefix/sorted 158 system 159 (delegate-system-entries file))))) 160 (when (and public (not delegates)) 161 (let* ((check-prefer (and system t)) 162 (result 163 (match-exact public 164 (public-entries file) 165 check-prefer))) 166 (when result 167 (return result)) 168 (setf delegates 169 (match-prefix/sorted 170 public 171 (delegate-public-entries file) 172 check-prefer)))) 173 (if delegates 174 (setf files delegates) 175 (setf files (append (next-catalog-entries file) files)))))))) 176 177(defun resolve-uri (uri catalog) 178 (setf uri (normalize-uri uri)) 179 (when (starts-with-p uri "urn:publicid:") 180 (return-from resolve-uri 181 (resolve-extid (unwrap-publicid uri) nil catalog))) 182 (let ((files (catalog-main-files catalog)) 183 (seen '())) 184 (while files 185 (let ((file (pop files))) 186 (unless (typep file 'entry-file) 187 (setf file (find-catalog-file file catalog))) 188 (unless (or (null file) (member file seen)) 189 (push file seen) 190 (let ((result 191 (or (match-exact uri (uri-entries file)) 192 (match-prefix/rewrite uri (rewrite-uri-entries file))))) 193 (when result 194 (return result)) 195 (let* ((delegate-entries 196 (delegate-uri-entries file)) 197 (delegates 198 (match-prefix/sorted uri delegate-entries))) 199 (if delegates 200 (setf files delegates) 201 (setf files (append (next-catalog-entries file) files)))))))))) 202 203(defun find-catalog-file (uri catalog) 204 (setf uri (if (stringp uri) (safe-parse-uri uri) uri)) 205 (let* ((*dtd-cache* (catalog-dtd-cache catalog)) 206 (*cache-all-dtds* t) 207 (file (parse-catalog-file uri))) 208 (when file 209 (let ((interned (puri:intern-uri uri (catalog-file-table catalog)))) 210 (setf (getf (puri:uri-plist interned) 'catalog) file))) 211 file)) 212 213(defun make-catalog (&optional (uris *default-catalog*)) 214 (let ((result (%make-catalog))) 215 (setf (catalog-main-files result) 216 (loop 217 for uri in uris 218 for file = (find-catalog-file uri result) 219 when file collect file)) 220 result)) 221 222(defun parse-catalog-file (uri) 223 (handler-case 224 (parse-catalog-file/strict uri) 225 ((or file-error xml-parse-error) (c) 226 (warn "ignoring catalog error: ~A" c)))) 227 228(defparameter *catalog-dtd* 229 (let* ((cxml 230 (slot-value (asdf:find-system :cxml) 'asdf::relative-pathname)) 231 (dtd (merge-pathnames "catalog.dtd" cxml))) 232 (with-open-file (s dtd :element-type '(unsigned-byte 8)) 233 (let ((bytes 234 (make-array (file-length s) :element-type '(unsigned-byte 8)))) 235 (read-sequence bytes s) 236 bytes)))) 237 238(defun parse-catalog-file/strict (uri) 239 (let* ((*catalog* nil) 240 (dtd-sysid 241 (puri:parse-uri "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd"))) 242 (flet ((entity-resolver (public system) 243 (declare (ignore public)) 244 (if (puri:uri= system dtd-sysid) 245 (make-octet-input-stream *catalog-dtd*) 246 nil))) 247 (with-open-stream (s (open (uri-to-pathname uri) 248 :element-type '(unsigned-byte 8) 249 :direction :input)) 250 (parse-stream s 251 (make-instance 'catalog-parser :uri uri) 252 :validate nil 253 :dtd (make-extid nil dtd-sysid) 254 :root #"catalog" 255 :entity-resolver #'entity-resolver))))) 256 257(defclass catalog-parser (sax:default-handler) 258 ((result :initform (make-entry-file) :accessor result) 259 (next :initform '() :accessor next) 260 (prefer-stack :initform (list *prefer*) :accessor prefer-stack) 261 (catalog-base-stack :accessor catalog-base-stack))) 262 263(defmethod initialize-instance :after 264 ((instance catalog-parser) &key uri) 265 (setf (catalog-base-stack instance) (list uri))) 266 267(defmethod prefer ((handler catalog-parser)) 268 (car (prefer-stack handler))) 269 270(defmethod base ((handler catalog-parser)) 271 (car (catalog-base-stack handler))) 272 273(defun get-attribute/lname (name attributes) 274 (let ((a (find name attributes 275 :key (lambda (a) 276 (or (sax:attribute-local-name a) 277 (sax:attribute-qname a))) 278 :test #'string=))) 279 (and a (sax:attribute-value a)))) 280 281(defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs) 282 (declare (ignore uri)) 283 (setf lname (or lname qname)) 284 ;; we can dispatch on lnames only because we validate against the DTD, 285 ;; which disallows other namespaces. 286 ;; FIXME: we don't, because we can't. 287 (push (let ((new (get-attribute/lname "prefer" attrs))) 288 (cond 289 ((equal new "public") :public) 290 ((equal new "system") :system) 291 ((null new) (prefer handler)))) 292 (prefer-stack handler)) 293 (push (string-or (get-attribute/lname "base" attrs) (base handler)) 294 (catalog-base-stack handler)) 295 (flet ((geturi (lname) 296 (puri:merge-uris 297 (safe-parse-uri (get-attribute/lname lname attrs)) 298 (base handler)))) 299 (cond 300 ((string= lname "public") 301 (push (list (normalize-public (get-attribute/lname "publicId" attrs)) 302 (geturi "uri") 303 (prefer handler)) 304 (public-entries (result handler)))) 305 ((string= lname "system") 306 (push (list (normalize-uri (get-attribute/lname "systemId" attrs)) 307 (geturi "uri")) 308 (system-entries (result handler)))) 309 ((string= lname "uri") 310 (push (list (normalize-uri (get-attribute/lname "name" attrs)) 311 (geturi "uri")) 312 (uri-entries (result handler)))) 313 ((string= lname "rewriteSystem") 314 (push (list (normalize-uri 315 (get-attribute/lname "systemIdStartString" attrs)) 316 (get-attribute/lname "rewritePrefix" attrs)) 317 (rewrite-system-entries (result handler)))) 318 ((string= lname "rewriteURI") 319 (push (list (normalize-uri 320 (get-attribute/lname "uriStartString" attrs)) 321 (get-attribute/lname "rewritePrefix" attrs)) 322 (rewrite-uri-entries (result handler)))) 323 ((string= lname "delegatePublic") 324 (push (list (normalize-public 325 (get-attribute/lname "publicIdStartString" attrs)) 326 (geturi "catalog") 327 (prefer handler)) 328 (delegate-public-entries (result handler)))) 329 ((string= lname "delegateSystem") 330 (push (list (normalize-uri 331 (get-attribute/lname "systemIdStartString" attrs)) 332 (geturi "catalog")) 333 (delegate-system-entries (result handler)))) 334 ((string= lname "delegateURI") 335 (push (list (normalize-uri 336 (get-attribute/lname "uriStartString" attrs)) 337 (geturi "catalog")) 338 (delegate-uri-entries (result handler)))) 339 ((string= lname "nextCatalog") 340 (push (geturi "catalog") 341 (next-catalog-entries (result handler))))))) 342 343(defmethod sax:end-element ((handler catalog-parser) uri lname qname) 344 (declare (ignore uri lname qname)) 345 (pop (catalog-base-stack handler)) 346 (pop (prefer-stack handler))) 347 348(defmethod sax:end-document ((handler catalog-parser)) 349 (result handler)) 350