1;;;; READ and friends 2 3;;;; This software is part of the SBCL system. See the README file for 4;;;; more information. 5;;;; 6;;;; This software is derived from the CMU CL system, which was 7;;;; written at Carnegie Mellon University and released into the 8;;;; public domain. The software is in the public domain and is 9;;;; provided with absolutely no warranty. See the COPYING and CREDITS 10;;;; files for more information. 11 12(in-package "SB!IMPL") 13 14;;;; miscellaneous global variables 15 16;;; ANSI: "the floating-point format that is to be used when reading a 17;;; floating-point number that has no exponent marker or that has e or 18;;; E for an exponent marker" 19(!defvar *read-default-float-format* 'single-float) 20(declaim (type (member short-float single-float double-float long-float) 21 *read-default-float-format*)) 22 23(defvar *readtable*) 24(declaim (type readtable *readtable*)) 25#!+sb-doc 26(setf (fdocumentation '*readtable* 'variable) 27 "Variable bound to current readtable.") 28 29;;; A standard Lisp readtable (once cold-init is through). This is for 30;;; recovery from broken read-tables (and for 31;;; WITH-STANDARD-IO-SYNTAX), and should not normally be user-visible. 32;;; If the initial value is changed from NIL to something more interesting, 33;;; be sure to update the duplicated definition in "src/code/print.lisp" 34(defglobal *standard-readtable* nil) 35 36;;; In case we get an error trying to parse a symbol, we want to rebind the 37;;; above stuff so it's cool. 38 39 40;;;; reader errors 41 42(defun reader-eof-error (stream context) 43 (declare (optimize allow-non-returning-tail-call)) 44 (error 'reader-eof-error 45 :stream stream 46 :context context)) 47 48;;; If The Gods didn't intend for us to use multiple namespaces, why 49;;; did They specify them? 50(defun simple-reader-error (stream control &rest args) 51 (declare (optimize allow-non-returning-tail-call)) 52 (error 'simple-reader-error 53 :stream stream 54 :format-control control 55 :format-arguments args)) 56 57;;;; macros and functions for character tables 58 59(declaim (ftype (sfunction (character readtable) (unsigned-byte 8)) 60 get-cat-entry)) 61(defun get-cat-entry (char rt) 62 (if (typep char 'base-char) 63 (elt (character-attribute-array rt) (char-code char)) 64 (values (gethash char (character-attribute-hash-table rt) 65 +char-attr-constituent+)))) 66 67(defun set-cat-entry (char newvalue &optional (rt *readtable*)) 68 (declare (character char) (type (unsigned-byte 8) newvalue) (readtable rt)) 69 (if (typep char 'base-char) 70 (setf (elt (character-attribute-array rt) (char-code char)) newvalue) 71 (if (= newvalue +char-attr-constituent+) 72 ;; Default value for the C-A-HASH-TABLE is +CHAR-ATTR-CONSTITUENT+. 73 (%remhash char (character-attribute-hash-table rt)) 74 (setf (gethash char (character-attribute-hash-table rt)) newvalue))) 75 (values)) 76 77;; Set the character-macro-table entry without coercing NEW-VALUE. 78;; As used by set-syntax-from-char it must always process "raw" values. 79(defun set-cmt-entry (char new-value &optional (rt *readtable*)) 80 (declare (character char) 81 (type (or null function fdefn) new-value) 82 (type readtable rt)) 83 (if (typep char 'base-char) 84 (setf (svref (character-macro-array rt) (char-code char)) new-value) 85 (if new-value ; never store NILs 86 (setf (gethash char (character-macro-hash-table rt)) new-value) 87 (remhash char (character-macro-hash-table rt))))) 88 89;;; the value actually stored in the character macro table. As per 90;;; ANSI #'GET-MACRO-CHARACTER and #'SET-MACRO-CHARACTER, this can 91;;; be either a function-designator or NIL, except that we store 92;;; symbols not as themselves but as their #<fdefn>. 93(defun get-raw-cmt-entry (char readtable) 94 (declare (character char) (readtable readtable)) 95 (if (typep char 'base-char) 96 (svref (character-macro-array readtable) (char-code char)) 97 (values (gethash char (character-macro-hash-table readtable) nil)))) 98 99;; As above but get the entry for SUB-CHAR in a dispatching macro table. 100(defun get-raw-cmt-dispatch-entry (sub-char sub-table) 101 (declare (character sub-char)) 102 (if (typep sub-char 'base-char) 103 (svref (truly-the (simple-vector #.base-char-code-limit) 104 (cdr (truly-the cons sub-table))) 105 (char-code sub-char)) 106 (awhen (car sub-table) 107 (gethash sub-char it)))) 108 109;; Coerce THING to a character-macro-table entry 110(defmacro !coerce-to-cmt-entry (thing) 111 `(let ((x ,thing)) 112 (if (typep x '(or null function)) x (find-or-create-fdefn x)))) 113 114;; Return a callable function given a character-macro-table entry. 115(defmacro !cmt-entry-to-function (val fallback) 116 `(let ((x ,val)) 117 (truly-the 118 function 119 (cond ((functionp x) x) 120 ((null x) ,fallback) 121 (t (sb!c:safe-fdefn-fun x)))))) 122 123;; Return a function-designator given a character-macro-table entry. 124(defmacro !cmt-entry-to-fun-designator (val) 125 `(let ((x ,val)) 126 (if (fdefn-p x) (fdefn-name x) x))) 127 128;;; The character attribute table is a BASE-CHAR-CODE-LIMIT vector 129;;; of (unsigned-byte 8) plus a hashtable to handle higher character codes. 130 131(defmacro test-attribute (char whichclass rt) 132 `(= (get-cat-entry ,char ,rt) ,whichclass)) 133 134;;; predicates for testing character attributes 135 136#!-sb-fluid 137(progn 138 (declaim (inline whitespace[1]p whitespace[2]p)) 139 (declaim (inline constituentp terminating-macrop)) 140 (declaim (inline single-escape-p multiple-escape-p)) 141 (declaim (inline token-delimiterp))) 142 143;;; the [1] and [2] here refer to ANSI glossary entries for 144;;; "whitespace". 145;; whitespace[2]p is the only predicate whose readtable is optional 146;; - other than whitespace[1]p which has a fixed readtable - due to 147;; callers not otherwise needing a readtable at all, and so not binding 148;; *READTABLE* into a local variable throughout their lifetime. 149(defun whitespace[1]p (char) 150 (test-attribute char +char-attr-whitespace+ *standard-readtable*)) 151(defun whitespace[2]p (char &optional (rt *readtable*)) 152 (test-attribute char +char-attr-whitespace+ rt)) 153 154(defun constituentp (char rt) 155 (test-attribute char +char-attr-constituent+ rt)) 156 157(defun terminating-macrop (char rt) 158 (test-attribute char +char-attr-terminating-macro+ rt)) 159 160(defun single-escape-p (char rt) 161 (test-attribute char +char-attr-single-escape+ rt)) 162 163(defun multiple-escape-p (char rt) 164 (test-attribute char +char-attr-multiple-escape+ rt)) 165 166(defun token-delimiterp (char &optional (rt *readtable*)) 167 ;; depends on actual attribute numbering in readtable.lisp. 168 (<= (get-cat-entry char rt) +char-attr-terminating-macro+)) 169 170;;;; constituent traits (see ANSI 2.1.4.2) 171 172;;; There are a number of "secondary" attributes which are constant 173;;; properties of characters (as long as they are constituents). 174 175(declaim (type attribute-table *constituent-trait-table*)) 176(defglobal *constituent-trait-table* 177 (make-array base-char-code-limit 178 :element-type '(unsigned-byte 8) 179 :initial-element +char-attr-constituent+)) 180 181(defun !set-constituent-trait (char trait) 182 (aver (typep char 'base-char)) 183 (setf (elt *constituent-trait-table* (char-code char)) 184 trait)) 185 186(defun !cold-init-constituent-trait-table () 187 (!set-constituent-trait #\: +char-attr-package-delimiter+) 188 (!set-constituent-trait #\. +char-attr-constituent-dot+) 189 (!set-constituent-trait #\+ +char-attr-constituent-sign+) 190 (!set-constituent-trait #\- +char-attr-constituent-sign+) 191 (!set-constituent-trait #\/ +char-attr-constituent-slash+) 192 (do ((i (char-code #\0) (1+ i))) 193 ((> i (char-code #\9))) 194 (!set-constituent-trait (code-char i) +char-attr-constituent-digit+)) 195 (!set-constituent-trait #\E +char-attr-constituent-expt+) 196 (!set-constituent-trait #\F +char-attr-constituent-expt+) 197 (!set-constituent-trait #\D +char-attr-constituent-expt+) 198 (!set-constituent-trait #\S +char-attr-constituent-expt+) 199 (!set-constituent-trait #\L +char-attr-constituent-expt+) 200 (!set-constituent-trait #\e +char-attr-constituent-expt+) 201 (!set-constituent-trait #\f +char-attr-constituent-expt+) 202 (!set-constituent-trait #\d +char-attr-constituent-expt+) 203 (!set-constituent-trait #\s +char-attr-constituent-expt+) 204 (!set-constituent-trait #\l +char-attr-constituent-expt+) 205 (!set-constituent-trait #\Space +char-attr-invalid+) 206 (!set-constituent-trait #\Newline +char-attr-invalid+) 207 (dolist (c (list backspace-char-code tab-char-code form-feed-char-code 208 return-char-code rubout-char-code)) 209 (!set-constituent-trait (code-char c) +char-attr-invalid+))) 210 211(declaim (inline get-constituent-trait)) 212(defun get-constituent-trait (char) 213 (if (typep char 'base-char) 214 (elt *constituent-trait-table* (char-code char)) 215 +char-attr-constituent+)) 216 217;;;; Readtable Operations 218 219(defun assert-not-standard-readtable (readtable operation) 220 (when (eq readtable *standard-readtable*) 221 (cerror "Frob it anyway!" 'standard-readtable-modified-error 222 :operation operation))) 223 224(defun readtable-case (readtable) 225 (%readtable-case readtable)) 226 227(defun (setf readtable-case) (case readtable) 228 ;; This function does not accept a readtable designator, only a readtable. 229 (assert-not-standard-readtable readtable '(setf readtable-case)) 230 (setf (%readtable-case readtable) case)) 231 232(defun readtable-normalization (readtable) 233 #!+sb-doc 234 "Returns T if READTABLE normalizes strings to NFKC, and NIL otherwise. 235The READTABLE-NORMALIZATION of the standard readtable is T." 236 (%readtable-normalization readtable)) 237 238(defun (setf readtable-normalization) (new-value readtable) 239 #!+sb-doc 240 "Sets the READTABLE-NORMALIZATION of the given READTABLE to NEW-VALUE. 241Pass T to make READTABLE normalize symbols to NFKC (the default behavior), 242and NIL to suppress normalization." 243 ;; This function does not accept a readtable designator, only a readtable. 244 (assert-not-standard-readtable readtable '(setf readtable-normalization)) 245 (setf (%readtable-normalization readtable) new-value)) 246 247(defun replace/eql-hash-table (to from &optional (transform #'identity)) 248 (maphash (lambda (k v) (setf (gethash k to) (funcall transform v))) from) 249 to) 250 251(defun %make-dispatch-macro-char (dtable) 252 (lambda (stream char) 253 (declare (ignore char)) 254 (read-dispatch-char stream dtable))) 255 256(defun %dispatch-macro-char-table (fun) 257 (and (closurep fun) 258 (eq (%closure-fun fun) 259 (load-time-value (%closure-fun (%make-dispatch-macro-char nil)) 260 t)) 261 (find-if-in-closure #'consp fun))) 262 263;; If ENTRY is a dispatching macro, copy its dispatch table. 264;; Otherwise return it without alteration. 265(defun copy-cmt-entry (entry) 266 (let ((dtable (%dispatch-macro-char-table entry))) 267 (if dtable 268 (%make-dispatch-macro-char 269 (cons (awhen (car dtable) 270 (replace/eql-hash-table (make-hash-table) it)) 271 (copy-seq (cdr dtable)))) 272 entry))) 273 274(defun copy-readtable (&optional (from-readtable *readtable*) to-readtable) 275 #!+sb-doc 276 "Copies FROM-READTABLE and returns the result. Uses TO-READTABLE as a target 277for the copy when provided, otherwise a new readtable is created. The 278FROM-READTABLE defaults to the standard readtable when NIL and to the current 279readtable when not provided." 280 (assert-not-standard-readtable to-readtable 'copy-readtable) 281 (let ((really-from-readtable (or from-readtable *standard-readtable*)) 282 (really-to-readtable (or to-readtable (make-readtable)))) 283 (replace (character-attribute-array really-to-readtable) 284 (character-attribute-array really-from-readtable)) 285 (replace/eql-hash-table 286 (clrhash (character-attribute-hash-table really-to-readtable)) 287 (character-attribute-hash-table really-from-readtable)) 288 (map-into (character-macro-array really-to-readtable) 289 #'copy-cmt-entry 290 (character-macro-array really-from-readtable)) 291 (replace/eql-hash-table 292 (clrhash (character-macro-hash-table really-to-readtable)) 293 (character-macro-hash-table really-from-readtable) 294 #'copy-cmt-entry) 295 (setf (readtable-case really-to-readtable) 296 (readtable-case really-from-readtable)) 297 (setf (readtable-normalization really-to-readtable) 298 (readtable-normalization really-from-readtable)) 299 really-to-readtable)) 300 301(defun set-syntax-from-char (to-char from-char &optional 302 (to-readtable *readtable*) (from-readtable nil)) 303 #!+sb-doc 304 "Causes the syntax of TO-CHAR to be the same as FROM-CHAR in the optional 305readtable (defaults to the current readtable). The FROM-TABLE defaults to the 306standard Lisp readtable when NIL." 307 ;; TO-READTABLE is a readtable, not a readtable-designator 308 (assert-not-standard-readtable to-readtable 'set-syntax-from-char) 309 (let* ((really-from-readtable (or from-readtable *standard-readtable*)) 310 (att (get-cat-entry from-char really-from-readtable)) 311 (mac (get-raw-cmt-entry from-char really-from-readtable))) 312 (set-cat-entry to-char att to-readtable) 313 (set-cmt-entry to-char (copy-cmt-entry mac) to-readtable)) 314 t) 315 316(defun set-macro-character (char function &optional 317 (non-terminatingp nil) 318 (rt-designator *readtable*)) 319 #!+sb-doc 320 "Causes CHAR to be a macro character which invokes FUNCTION when seen 321 by the reader. The NON-TERMINATINGP flag can be used to make the macro 322 character non-terminating, i.e. embeddable in a symbol name." 323 (let ((designated-readtable (or rt-designator *standard-readtable*))) 324 (assert-not-standard-readtable designated-readtable 'set-macro-character) 325 (set-cat-entry char (if non-terminatingp 326 +char-attr-constituent+ 327 +char-attr-terminating-macro+) 328 designated-readtable) 329 (set-cmt-entry char (!coerce-to-cmt-entry function) designated-readtable) 330 t)) ; (ANSI-specified return value) 331 332(defun get-macro-character (char &optional (rt-designator *readtable*)) 333 #!+sb-doc 334 "Return the function associated with the specified CHAR which is a macro 335 character, or NIL if there is no such function. As a second value, return 336 T if CHAR is a macro character which is non-terminating, i.e. which can 337 be embedded in a symbol name." 338 (let* ((designated-readtable (or rt-designator *standard-readtable*)) 339 ;; the first return value: (OR FUNCTION SYMBOL) if CHAR is a macro 340 ;; character, or NIL otherwise 341 (fun-value (!cmt-entry-to-fun-designator 342 (get-raw-cmt-entry char designated-readtable)))) 343 (values fun-value 344 ;; NON-TERMINATING-P return value: 345 (if fun-value 346 (or (constituentp char designated-readtable) 347 (not (terminating-macrop char designated-readtable))) 348 ;; ANSI's definition of GET-MACRO-CHARACTER says this 349 ;; value is NIL when CHAR is not a macro character. 350 ;; I.e. this value means not just "non-terminating 351 ;; character?" but "non-terminating macro character?". 352 nil)))) 353 354(defun get-dispatch-macro-char-table (disp-char readtable &optional (errorp t)) 355 (cond ((%dispatch-macro-char-table (get-raw-cmt-entry disp-char readtable))) 356 (errorp (error "~S is not a dispatching macro character." disp-char)))) 357 358(defun make-dispatch-macro-character (char &optional 359 (non-terminating-p nil) 360 (rt *readtable*)) 361 #!+sb-doc 362 "Cause CHAR to become a dispatching macro character in readtable (which 363 defaults to the current readtable). If NON-TERMINATING-P, the char will 364 be non-terminating." 365 ;; This used to call ERROR if the character was already a dispatching 366 ;; macro but I saw no evidence of that in other implementations except cmucl. 367 ;; Without a portable way to inquire whether a character is dispatching, 368 ;; a file that frobs *READTABLE* can't be repeatedly loaded except 369 ;; by catching the error, so I removed it. 370 ;; RT is a readtable, not a readtable-designator, as per CLHS. 371 (unless (get-dispatch-macro-char-table char rt nil) 372 ;; The dtable is a cons whose whose CAR is initially NIL but upgraded 373 ;; to a hashtable if required, and whose CDR is a vector indexed by 374 ;; char-code up to the maximum base-char. 375 (let ((dtable (cons nil (make-array base-char-code-limit 376 :initial-element nil)))) 377 (set-macro-character char (%make-dispatch-macro-char dtable) 378 non-terminating-p rt))) 379 t) 380 381(defun set-dispatch-macro-character (disp-char sub-char function 382 &optional (rt-designator *readtable*)) 383 #!+sb-doc 384 "Cause FUNCTION to be called whenever the reader reads DISP-CHAR 385 followed by SUB-CHAR." 386 ;; Get the dispatch char for macro (error if not there), diddle 387 ;; entry for sub-char. 388 (let* ((sub-char (char-upcase sub-char)) 389 (readtable (or rt-designator *standard-readtable*))) 390 (assert-not-standard-readtable readtable 'set-dispatch-macro-character) 391 (when (digit-char-p sub-char) 392 (error "SUB-CHAR must not be a decimal digit: ~S" sub-char)) 393 (let ((dtable (get-dispatch-macro-char-table disp-char readtable)) 394 (function (!coerce-to-cmt-entry function))) 395 ;; (SET-MACRO-CHARACTER #\$ (GET-MACRO-CHARACTER #\#)) will share 396 ;; the dispatch table. Perhaps it should be copy-on-write? 397 (if (typep sub-char 'base-char) 398 (setf (svref (cdr dtable) (char-code sub-char)) function) 399 (let ((hashtable (car dtable))) 400 (cond (function ; allocate the hashtable if it wasn't made yet 401 (setf (gethash sub-char 402 (or hashtable (setf (car dtable) 403 (make-hash-table)))) 404 function)) 405 (hashtable ; remove an existing entry 406 (remhash sub-char hashtable))))))) 407 t) 408 409(defun get-dispatch-macro-character (disp-char sub-char 410 &optional (rt-designator *readtable*)) 411 #!+sb-doc 412 "Return the macro character function for SUB-CHAR under DISP-CHAR 413 or NIL if there is no associated function." 414 (let ((dtable (get-dispatch-macro-char-table 415 disp-char (or rt-designator *standard-readtable*)))) 416 (!cmt-entry-to-fun-designator 417 (get-raw-cmt-dispatch-entry (char-upcase sub-char) dtable)))) 418 419 420;;;; definitions to support internal programming conventions 421 422(defconstant +EOF+ 0) 423 424(defun flush-whitespace (stream) 425 ;; This flushes whitespace chars, returning the last char it read (a 426 ;; non-white one). It always gets an error on end-of-file. 427 (let* ((stream (in-synonym-of stream)) 428 (rt *readtable*) 429 (attribute-array (character-attribute-array rt)) 430 (attribute-hash-table (character-attribute-hash-table rt))) 431 (macrolet ((done-p () 432 '(not (eql (if (typep char 'base-char) 433 (aref attribute-array (char-code char)) 434 (gethash char attribute-hash-table 435 +char-attr-constituent+)) 436 +char-attr-whitespace+)))) 437 (if (ansi-stream-p stream) 438 (prepare-for-fast-read-char stream 439 (loop (let ((char (fast-read-char t))) 440 (cond ((done-p) 441 (done-with-fast-read-char) 442 (return char)))))) 443 ;; CLOS stream 444 (loop (let ((char (read-char stream nil +EOF+))) 445 ;; (THE) should not be needed if DONE-P, but it was not 446 ;; being derived to return a character, causing an extra 447 ;; check in consumers of flush-whitespace despite the 448 ;; promise to return a character or else signal EOF. 449 (cond ((eq char +EOF+) (error 'end-of-file :stream stream)) 450 ((done-p) (return (the character char)))))))))) 451 452;;;; temporary initialization hack 453 454;; Install the (easy) standard macro-chars into *READTABLE*. 455(defun !cold-init-standard-readtable () 456 (/show0 "entering !cold-init-standard-readtable") 457 ;; All characters get boring defaults in MAKE-READTABLE. Now we 458 ;; override the boring defaults on characters which need more 459 ;; interesting behavior. 460 (flet ((whitespaceify (char) 461 (set-cmt-entry char nil) 462 (set-cat-entry char +char-attr-whitespace+))) 463 (whitespaceify (code-char tab-char-code)) 464 (whitespaceify #\Newline) 465 (whitespaceify #\Space) 466 (whitespaceify (code-char form-feed-char-code)) 467 (whitespaceify (code-char return-char-code))) 468 469 (set-cat-entry #\\ +char-attr-single-escape+) 470 (set-cmt-entry #\\ nil) 471 472 (set-cat-entry #\| +char-attr-multiple-escape+) 473 (set-cmt-entry #\| nil) 474 475 ;; Easy macro-character definitions are in this source file. 476 (set-macro-character #\" #'read-string) 477 (set-macro-character #\' #'read-quote) 478 ;; Using symbols makes these traceable and redefineable with ease, 479 ;; as well as avoids a forward-referenced function (from "backq") 480 (set-macro-character #\( 'read-list) 481 (set-macro-character #\) 'read-right-paren) 482 (set-macro-character #\; #'read-comment) 483 ;; (The hairier macro-character definitions, for #\# and #\`, are 484 ;; defined elsewhere, in their own source files.) 485 486 ;; all constituents 487 (do ((ichar 0 (1+ ichar)) 488 (char)) 489 ((= ichar base-char-code-limit)) 490 (setq char (code-char ichar)) 491 (when (constituentp char *readtable*) 492 (set-cmt-entry char nil))) 493 494 (/show0 "leaving !cold-init-standard-readtable")) 495 496;;;; implementation of the read buffer 497 498(defstruct (token-buf (:predicate nil) (:copier nil) 499 (:constructor 500 make-token-buf 501 (&aux 502 (initial-string (make-string 128)) 503 (string initial-string) 504 (adjustable-string 505 (make-array 0 506 :element-type 'character 507 :fill-pointer nil 508 :displaced-to string))))) 509 ;; The string accumulated during reading of tokens. 510 ;; Always starts out EQ to 'initial-string'. 511 (string nil :type (simple-array character (*))) 512 ;; Counter advanced as characters are placed into 'string' 513 (fill-ptr 0 :type index) 514 ;; Counter advanced as characters are consumed from 'string' on re-scan 515 ;; by auxilliary functions MAKE-{INTEGER,FLOAT,RATIONAL} etc. 516 (cursor 0 :type index) 517 ;; A string used only for FIND-PACKAGE calls in package-qualified 518 ;; symbols so that we don't need to call SUBSEQ on the 'string'. 519 (adjustable-string nil :type (and (array character (*)) (not simple-array))) 520 ;; A small string that is permanently assigned into this token-buf. 521 (initial-string nil :type (simple-array character (128)) 522 :read-only t) 523 (escapes (make-array 10 :element-type 'fixnum :fill-pointer 0 :adjustable t) 524 :type (and (vector fixnum) (not simple-array)) :read-only t) 525 ;; Link to next TOKEN-BUF, to chain the *TOKEN-BUF-POOL* together. 526 (next nil :type (or null token-buf)) 527 (only-base-chars t :type boolean)) 528(declaim (freeze-type token-buf)) 529 530(defmethod print-object ((self token-buf) stream) 531 (print-unreadable-object (self stream :identity t :type t) 532 (format stream "~@[next=~S~]" (token-buf-next self)))) 533 534;; The current TOKEN-BUF 535(declaim (type token-buf *read-buffer*)) 536(defvar *read-buffer*) 537 538;; A list of available TOKEN-BUFs 539;; Should need no toplevel binding if multi-threaded, 540;; but doesn't really matter, as INITIAL-THREAD-FUNCTION-TRAMPOLINE 541;; rebinds to NIL. 542(declaim (type (or null token-buf) *token-buf-pool*)) 543(defvar *token-buf-pool* nil) 544 545(defun reset-read-buffer (buffer) 546 ;; Turn BUFFER into an empty read buffer. 547 (setf (fill-pointer (token-buf-escapes buffer)) 0) 548 (setf (token-buf-fill-ptr buffer) 0) 549 (setf (token-buf-cursor buffer) 0) 550 (setf (token-buf-only-base-chars buffer) t) 551 buffer) 552 553;; "Output" a character into the reader's buffer. 554(declaim (inline ouch-read-buffer)) 555(defun ouch-read-buffer (char buffer) 556 ;; When buffer overflow 557 (let ((op (token-buf-fill-ptr buffer))) 558 (declare (optimize (sb!c::insert-array-bounds-checks 0))) 559 (when (>= op (length (token-buf-string buffer))) 560 ;; an out-of-line call for the uncommon case avoids bloat. 561 ;; Size should be doubled. 562 (grow-read-buffer)) 563 (unless (typep char 'base-char) 564 (setf (token-buf-only-base-chars buffer) nil)) 565 (setf (elt (token-buf-string buffer) op) char) 566 (setf (token-buf-fill-ptr buffer) (1+ op)))) 567 568(defun ouch-read-buffer-escaped (char buf) 569 (vector-push-extend (token-buf-fill-ptr buf) (token-buf-escapes buf)) 570 (ouch-read-buffer char buf)) 571 572(defun grow-read-buffer () 573 (let* ((b *read-buffer*) 574 (string (token-buf-string b))) 575 (setf (token-buf-string b) 576 (replace (make-string (* 2 (length string))) string)))) 577 578;; Retun the next character from the buffered token, or NIL. 579(declaim (maybe-inline token-buf-getchar)) 580(defun token-buf-getchar (b) 581 (declare (optimize (sb!c::insert-array-bounds-checks 0))) 582 (let ((i (token-buf-cursor (truly-the token-buf b)))) 583 (and (< i (token-buf-fill-ptr b)) 584 (prog1 (elt (token-buf-string b) i) 585 (setf (token-buf-cursor b) (1+ i)))))) 586 587;; Grab a buffer off the token-buf pool if there is one, or else make one. 588;; This does not need to be protected against other threads because the 589;; pool is thread-local, or against async interrupts. An async signal 590;; delivered anywhere in the midst of the code sequence below can not 591;; corrupt the buffer given to the caller of ACQUIRE-TOKEN-BUF. 592;; Additionally the cleanup is on a "best effort" basis. Async unwinds 593;; through WITH-READ-BUFFER fail to recycle token-bufs, but that's ok. 594(defun acquire-token-buf () 595 (let ((this-buffer *token-buf-pool*)) 596 (cond (this-buffer 597 (shiftf *token-buf-pool* (token-buf-next this-buffer) nil) 598 this-buffer) 599 (t 600 (make-token-buf))))) 601 602(defun release-token-buf (chain) 603 (named-let free ((buffer chain)) 604 ;; If 'adjustable-string' was displaced to 'string', 605 ;; adjust it back down to allow GC of the abnormally large string. 606 (unless (eq (%array-data-vector (token-buf-adjustable-string buffer)) 607 (token-buf-initial-string buffer)) 608 (adjust-array (token-buf-adjustable-string buffer) '(0) 609 :displaced-to (token-buf-initial-string buffer))) 610 ;; 'initial-string' is assigned into 'string' 611 ;; so not to preserve huge buffers in the pool indefinitely. 612 (setf (token-buf-string buffer) (token-buf-initial-string buffer)) 613 (if (token-buf-next buffer) 614 (free (token-buf-next buffer)) 615 (setf (token-buf-next buffer) *token-buf-pool*))) 616 (setf *token-buf-pool* chain)) 617 618;; Return a fresh copy of BUFFER's string 619(defun copy-token-buf-string (buffer) 620 (subseq (token-buf-string buffer) 0 (token-buf-fill-ptr buffer))) 621 622;; Return a string displaced to BUFFER's string. 623;; The string should not be held onto - either a copy must be made 624;; by the receiver, or it should be parsed into something else. 625(defun sized-token-buf-string (buffer) 626 ;; It would in theory be faster to make the adjustable array have 627 ;; a fill-pointer, and just set that most of the time. Except we still 628 ;; need the ability to displace to a different string if a package name 629 ;; has >128 characters, so then there'd be two modes of sharing, one of 630 ;; which is rarely exercised and most likely to be subtly wrong. 631 ;; At any rate, SET-ARRAY-HEADER is faster than ADJUST-ARRAY. 632 ;; TODO: find evidence that it is/is-not worth having complicated 633 ;; mechanism involving a fill-pointer or not. 634 (set-array-header 635 (token-buf-adjustable-string buffer) ; the array 636 (token-buf-string buffer) ; the underlying data 637 (token-buf-fill-ptr buffer) ; total size 638 nil ; fill-pointer 639 0 ; displacement 640 (token-buf-fill-ptr buffer) ; dimension 0 641 t nil)) ; displacedp / newp 642 643;; Acquire a TOKEN-BUF from the pool and execute the body, returning only 644;; the primary value therefrom. Recycle the buffer when done. 645;; No UNWIND-PROTECT - recycling is designed to help with the common case 646;; of normal return and is not intended to be resilient against nonlocal exit. 647(defmacro with-read-buffer (() &body body) 648 `(let* ((*read-buffer* (acquire-token-buf)) 649 (result (progn ,@body))) 650 (release-token-buf *read-buffer*) 651 result)) 652 653(defun check-for-recursive-read (stream recursive-p operator-name) 654 (when (and recursive-p (not (boundp '*read-buffer*))) 655 (simple-reader-error 656 stream 657 "~A was invoked with RECURSIVE-P being true outside ~ 658 of a recursive read operation." 659 `(,operator-name)))) 660 661;;;; READ-PRESERVING-WHITESPACE, READ-DELIMITED-LIST, and READ 662 663;;; A list for #=, used to keep track of objects with labels assigned that 664;;; have been completely read. Each entry is a SHARP-EQUAL-WRAPPER object. 665;;; 666;;; KLUDGE: Should this really be a list? It seems as though users 667;;; could reasonably expect N log N performance for large datasets. 668;;; On the other hand, it's probably very very seldom a problem in practice. 669;;; On the third hand, it might be just as easy to use a hash table, 670;;; so maybe we should. -- WHN 19991202 671(defvar *sharp-equal* ()) 672 673(declaim (ftype (sfunction (t t) (values bit t)) read-maybe-nothing)) 674 675;;; Like READ-PRESERVING-WHITESPACE, but doesn't check the read buffer 676;;; for being set up properly. 677(defun %read-preserving-whitespace (stream eof-error-p eof-value recursive-p) 678 (declare (optimize (sb!c::check-tag-existence 0))) 679 (if recursive-p 680 ;; a loop for repeating when a macro returns nothing 681 (let* ((tracking-p (form-tracking-stream-p stream)) 682 (outermost-p 683 (and tracking-p 684 (null (form-tracking-stream-form-start-char-pos stream))))) 685 (loop 686 (let ((char (read-char stream eof-error-p +EOF+))) 687 (cond ((eq char +EOF+) (return eof-value)) 688 ((whitespace[2]p char)) 689 (t 690 (when outermost-p 691 ;; Calling FILE-POSITION at each token seems to slow down 692 ;; the reader by somewhere between 8x to 10x. 693 ;; Once per outermost form is acceptably fast though. 694 (setf (form-tracking-stream-form-start-byte-pos stream) 695 ;; pretend we queried the position before reading CHAR 696 (- (file-position stream) 697 (or (file-string-length stream (string char)) 0)) 698 (form-tracking-stream-form-start-char-pos stream) 699 ;; likewise 700 (1- (form-tracking-stream-input-char-pos stream)))) 701 (multiple-value-bind (result-p result) 702 (read-maybe-nothing stream char) 703 (unless (zerop result-p) 704 (return (unless *read-suppress* result))) 705 ;; Repeat if macro returned nothing. 706 (when tracking-p 707 (funcall (form-tracking-stream-observer stream) 708 :reset nil nil)))))))) 709 (let ((*sharp-equal* nil)) 710 (with-read-buffer () 711 (%read-preserving-whitespace stream eof-error-p eof-value t))))) 712 713;;; READ-PRESERVING-WHITESPACE behaves just like READ, only it makes 714;;; sure to leave terminating whitespace in the stream. (This is a 715;;; COMMON-LISP exported symbol.) 716(defun read-preserving-whitespace (&optional (stream *standard-input*) 717 (eof-error-p t) 718 (eof-value nil) 719 (recursive-p nil)) 720 #!+sb-doc 721 "Read from STREAM and return the value read, preserving any whitespace 722 that followed the object." 723 (declare (explicit-check)) 724 (check-for-recursive-read stream recursive-p 'read-preserving-whitespace) 725 (%read-preserving-whitespace stream eof-error-p eof-value recursive-p)) 726 727;;; Read from STREAM given starting CHAR, returning 1 and the resulting 728;;; object, unless CHAR is a macro yielding no value, then 0 and NIL, 729;;; for functions that want comments to return so that they can look 730;;; past them. CHAR must not be whitespace. 731(defun read-maybe-nothing (stream char) 732 (truly-the 733 (values bit t) ; avoid a type-check. M-V-CALL is lame 734 (multiple-value-call 735 (lambda (stream start-pos &optional (result nil supplied-p) &rest junk) 736 (declare (ignore junk)) ; is this ANSI-specified? 737 (when (and supplied-p start-pos) 738 (funcall (form-tracking-stream-observer stream) 739 start-pos 740 (form-tracking-stream-input-char-pos stream) result)) 741 (values (if supplied-p 1 0) result)) 742 ;; KLUDGE: not capturing anything in the lambda avoids closure consing 743 stream 744 (and (form-tracking-stream-p stream) 745 ;; Subtract 1 because the position points _after_ CHAR. 746 (1- (form-tracking-stream-input-char-pos stream))) 747 (funcall (!cmt-entry-to-function 748 (get-raw-cmt-entry char *readtable*) #'read-token) 749 stream char)))) 750 751(defun read (&optional (stream *standard-input*) 752 (eof-error-p t) 753 (eof-value nil) 754 (recursive-p nil)) 755 #!+sb-doc 756 "Read the next Lisp value from STREAM, and return it." 757 (declare (explicit-check)) 758 (check-for-recursive-read stream recursive-p 'read) 759 (let* ((local-eof-val (load-time-value (cons nil nil) t)) 760 (result (%read-preserving-whitespace 761 stream eof-error-p local-eof-val recursive-p))) 762 ;; This function generally discards trailing whitespace. If you 763 ;; don't want to discard trailing whitespace, call 764 ;; CL:READ-PRESERVING-WHITESPACE instead. 765 (unless (or (eql result local-eof-val) recursive-p) 766 (let ((next-char (read-char stream nil +EOF+))) 767 (unless (or (eq next-char +EOF+) 768 (whitespace[2]p next-char)) 769 (unread-char next-char stream)))) 770 (if (eq result local-eof-val) eof-value result))) 771 772 773;;;; basic readmacro definitions 774;;;; 775;;;; Some large, hairy subsets of readmacro definitions (backquotes 776;;;; and sharp macros) are not here, but in their own source files. 777 778(defun read-quote (stream ignore) 779 (declare (ignore ignore)) 780 (list 'quote (read stream t nil t))) 781 782(defun read-comment (stream ignore) 783 (declare (ignore ignore)) 784 (handler-bind 785 ((character-decoding-error 786 #'(lambda (decoding-error) 787 (declare (ignorable decoding-error)) 788 (style-warn 789 'sb!kernel::character-decoding-error-in-macro-char-comment 790 :position (file-position stream) :stream stream) 791 (invoke-restart 'attempt-resync)))) 792 (let ((stream (in-synonym-of stream))) 793 (if (ansi-stream-p stream) 794 (prepare-for-fast-read-char stream 795 (loop (let ((char (fast-read-char nil +EOF+))) 796 (when (or (eq char +EOF+) (char= char #\newline)) 797 (return (done-with-fast-read-char)))))) 798 ;; CLOS stream 799 (loop (let ((char (read-char stream nil +EOF+))) 800 (when (or (eq char +EOF+) (char= char #\newline)) 801 (return))))))) 802 ;; Don't return anything. 803 (values)) 804 805;;; FIXME: for these two macro chars, if STREAM is a FORM-TRACKING-STREAM, 806;;; every cons cell should generate a notification so that the readtable 807;;; manipulation in SB-COVER can be eliminated in favor of a stream observer. 808;;; It is cheap to add events- it won't increase consing in the compiler 809;;; because it the extra events can simply be ignored. 810(macrolet 811 ((with-list-reader ((streamvar delimiter) &body body) 812 `(let* ((thelist (list nil)) 813 (listtail thelist) 814 (collectp (if *read-suppress* 0 -1))) 815 (declare (dynamic-extent thelist)) 816 (loop (let ((firstchar (flush-whitespace ,streamvar))) 817 (when (eq firstchar ,delimiter) 818 (return (cdr thelist))) 819 ,@body)))) 820 (read-list-item (streamvar) 821 `(multiple-value-bind (winp obj) 822 (read-maybe-nothing ,streamvar firstchar) 823 ;; allow for a character macro return to return nothing 824 (unless (zerop (logand winp collectp)) 825 (setq listtail 826 (cdr (rplacd (truly-the cons listtail) (list obj)))))))) 827 828 ;;; The character macro handler for left paren 829 (defun read-list (stream ignore) 830 (declare (ignore ignore)) 831 (with-list-reader (stream #\)) 832 (when (eq firstchar #\.) 833 (let ((nextchar (read-char stream t))) 834 (cond ((token-delimiterp nextchar) 835 (cond ((eq listtail thelist) 836 (unless (zerop collectp) 837 (simple-reader-error 838 stream "Nothing appears before . in list."))) 839 ((whitespace[2]p nextchar) 840 (setq nextchar (flush-whitespace stream)))) 841 (rplacd (truly-the cons listtail) 842 (read-after-dot stream nextchar collectp)) 843 ;; Check for improper ". ,@" or ". ,." now rather than 844 ;; in the #\` reader. The resulting QUASIQUOTE macro might 845 ;; never be exapanded, but nonetheless could be erroneous. 846 (unless (zerop (logand *backquote-depth* collectp)) 847 (let ((lastcdr (cdr (last listtail)))) 848 (when (and (comma-p lastcdr) (comma-splicing-p lastcdr)) 849 (simple-reader-error 850 stream "~S contains a splicing comma after a dot" 851 (cdr thelist))))) 852 (return (cdr thelist))) 853 ;; Put back NEXTCHAR so that we can read it normally. 854 (t (unread-char nextchar stream))))) 855 ;; Next thing is not an isolated dot. 856 (read-list-item stream))) 857 858 ;;; (This is a COMMON-LISP exported symbol.) 859 (defun read-delimited-list (endchar &optional 860 (input-stream *standard-input*) 861 recursive-p) 862 #!+sb-doc 863 "Read Lisp values from INPUT-STREAM until the next character after a 864 value's representation is ENDCHAR, and return the objects as a list." 865 (declare (explicit-check)) 866 (check-for-recursive-read input-stream recursive-p 'read-delimited-list) 867 (flet ((%read-delimited-list () 868 (with-list-reader (input-stream endchar) 869 (read-list-item input-stream)))) 870 (if recursive-p 871 (%read-delimited-list) 872 (with-read-buffer () (%read-delimited-list)))))) ; end MACROLET 873 874(defun read-after-dot (stream firstchar collectp) 875 ;; FIRSTCHAR is non-whitespace! 876 (let ((lastobj ())) 877 (do ((char firstchar (flush-whitespace stream))) 878 ((eq char #\)) 879 (if (zerop collectp) 880 (return-from read-after-dot nil) 881 (simple-reader-error stream "Nothing appears after . in list."))) 882 ;; See whether there's something there. 883 (multiple-value-bind (winp obj) (read-maybe-nothing stream char) 884 (unless (zerop winp) (return (setq lastobj obj))))) 885 ;; At least one thing appears after the dot. 886 ;; Check for more than one thing following dot. 887 (loop 888 (let ((char (flush-whitespace stream))) 889 (cond ((eq char #\)) (return lastobj)) ;success! 890 ;; Try reading virtual whitespace. 891 ((not (zerop (logand (read-maybe-nothing stream char) 892 (truly-the fixnum collectp)))) 893 (simple-reader-error 894 stream "More than one object follows . in list."))))))) 895 896;;; Whether it is permissible to read strings as base-string 897;;; if no extended-chars are present. The system itself prefers this, but 898;;; otherwise it is a contentious issue. We don't (by default) use base-strings, 899;;; so that people can dubiously write: 900;;; (SETF (CHAR (READ-STRING S) 0) #\PILE_OF_POO), 901;;; which is stupid because it makes an assumption about what READ does. 902#!+sb-unicode 903(defvar *read-prefer-base-string* t) 904(eval-when (:compile-toplevel :execute) 905 (sb!xc:defmacro token-elt-type (flag) 906 (declare (ignorable flag)) 907 `(if (and ,flag #!+sb-unicode *read-prefer-base-string*) 908 'base-char 909 'character))) 910 911(defun read-string (stream closech) 912 ;; This accumulates chars until it sees same char that invoked it. 913 ;; We avoid copying any given input character more than twice- 914 ;; once to a temp buffer and then to the result. In the worst case, 915 ;; we can waste space equal the unwasted space, if the final character 916 ;; causes allocation of a new buffer for just that character, 917 ;; because the buffer size is doubled each time it overflows. 918 ;; (Would be better to peek at the frc-buffer if the stream has one.) 919 ;; Scratch vectors are GC-able as soon as this function returns though. 920 (declare (character closech)) 921 (macrolet ((scan (read-a-char eofp &optional finish) 922 `(loop (let ((char ,read-a-char)) 923 (declare (optimize (sb!c::insert-array-bounds-checks 0))) 924 (cond (,eofp (error 'end-of-file :stream stream)) 925 ((eql char closech) 926 (return ,finish)) 927 ((single-escape-p char rt) 928 (setq char ,read-a-char) 929 (when ,eofp 930 (error 'end-of-file :stream stream)))) 931 (when (>= ptr lim) 932 (unless suppress 933 (push buf chain) 934 (setq lim (the index (ash lim 1)) 935 buf (make-array lim :element-type 'character))) 936 (setq ptr 0)) 937 (setf (schar buf ptr) (truly-the character char)) 938 #!+sb-unicode ; BASE-CHAR-P does not exist if not 939 (unless (base-char-p char) (setq only-base-chars nil)) 940 (incf ptr))))) 941 (let* ((token-buf *read-buffer*) 942 (buf (token-buf-string token-buf)) 943 (rt *readtable*) 944 (stream (in-synonym-of stream)) 945 (suppress *read-suppress*) 946 (lim (length buf)) 947 (ptr 0) 948 (only-base-chars t) 949 (chain)) 950 (declare (type (simple-array character (*)) buf)) 951 (reset-read-buffer token-buf) 952 (if (ansi-stream-p stream) 953 (prepare-for-fast-read-char stream 954 (scan (fast-read-char t) nil (done-with-fast-read-char))) 955 ;; CLOS stream 956 (scan (read-char stream nil +EOF+) (eq char +EOF+))) 957 (if suppress 958 "" 959 (let* ((sum (loop for buf in chain sum (length buf))) 960 (result 961 (make-array (+ sum ptr) 962 :element-type (token-elt-type only-base-chars)))) 963 (setq ptr sum) 964 ;; Now work backwards from the end 965 (replace result buf :start1 ptr) 966 (dolist (buf chain result) 967 (declare (type (simple-array character (*)) buf)) 968 (let ((len (length buf))) 969 (decf ptr len) 970 (replace result buf :start1 ptr)))))))) 971 972(defun read-right-paren (stream ignore) 973 (declare (ignore ignore)) 974 (simple-reader-error stream "unmatched close parenthesis")) 975 976;;; Read from the stream up to the next delimiter. Leave the resulting 977;;; token in *READ-BUFFER*, and return three values: 978;;; -- a TOKEN-BUF 979;;; -- whether any escape character was seen (even if no character is escaped) 980;;; -- whether a package delimiter character was seen 981;;; Normalizes the input to NFKC before returning 982(defun internal-read-extended-token (stream firstchar escape-firstchar 983 &aux (read-buffer *read-buffer*)) 984 (reset-read-buffer read-buffer) 985 (when escape-firstchar 986 (ouch-read-buffer-escaped firstchar read-buffer) 987 (setq firstchar (read-char stream nil +EOF+))) 988 (do ((char firstchar (read-char stream nil +EOF+)) 989 (seen-multiple-escapes nil) 990 (rt *readtable*) 991 (colon nil)) 992 ((cond ((eq char +EOF+) t) 993 ((token-delimiterp char rt) 994 (unread-char char stream) 995 t) 996 (t nil)) 997 (progn 998 (multiple-value-setq (read-buffer colon) 999 (normalize-read-buffer read-buffer colon)) 1000 (values read-buffer 1001 (or (plusp (fill-pointer (token-buf-escapes read-buffer))) 1002 seen-multiple-escapes) 1003 colon))) 1004 (flet ((escape-1-char () 1005 ;; It can't be a number, even if it's 1\23. 1006 ;; Read next char here, so it won't be casified. 1007 (let ((nextchar (read-char stream nil +EOF+))) 1008 (if (eq nextchar +EOF+) 1009 (reader-eof-error stream "after escape character") 1010 (ouch-read-buffer-escaped nextchar read-buffer))))) 1011 (cond ((single-escape-p char rt) (escape-1-char)) 1012 ((multiple-escape-p char rt) 1013 (setq seen-multiple-escapes t) 1014 ;; Read to next multiple-escape, escaping single chars 1015 ;; along the way. 1016 (loop 1017 (let ((ch (read-char stream nil +EOF+))) 1018 (cond ((eq ch +EOF+) 1019 (reader-eof-error stream "inside extended token")) 1020 ((multiple-escape-p ch rt) (return)) 1021 ((single-escape-p ch rt) (escape-1-char)) 1022 (t (ouch-read-buffer-escaped ch read-buffer)))))) 1023 (t 1024 (when (and (not colon) ; easiest test first 1025 (constituentp char rt) 1026 (eql (get-constituent-trait char) 1027 +char-attr-package-delimiter+)) 1028 (setq colon t)) 1029 (ouch-read-buffer char read-buffer)))))) 1030 1031;;;; character classes 1032 1033;;; Return the character class for CHAR. 1034;;; 1035;;; FIXME: why aren't these ATT-getting forms using GET-CAT-ENTRY? 1036;;; Because we've cached the readtable tables? 1037(defmacro char-class (char attarray atthash) 1038 `(let ((att (if (typep (truly-the character ,char) 'base-char) 1039 (aref ,attarray (char-code ,char)) 1040 (gethash ,char ,atthash +char-attr-constituent+)))) 1041 (declare (fixnum att)) 1042 (cond 1043 ((<= att +char-attr-terminating-macro+) +char-attr-delimiter+) 1044 ((< att +char-attr-constituent+) att) 1045 (t (setf att (get-constituent-trait ,char)) 1046 (if (= att +char-attr-invalid+) 1047 (simple-reader-error stream "invalid constituent") 1048 att))))) 1049 1050;;; Return the character class for CHAR, which might be part of a 1051;;; rational number. 1052(defmacro char-class2 (char attarray atthash read-base) 1053 `(let ((att (if (typep (truly-the character ,char) 'base-char) 1054 (aref ,attarray (char-code ,char)) 1055 (gethash ,char ,atthash +char-attr-constituent+)))) 1056 (declare (fixnum att)) 1057 (cond 1058 ((<= att +char-attr-terminating-macro+) +char-attr-delimiter+) 1059 ((< att +char-attr-constituent+) att) 1060 (t (setf att (get-constituent-trait ,char)) 1061 (cond 1062 ((digit-char-p ,char ,read-base) +char-attr-constituent-digit+) 1063 ((= att +char-attr-constituent-digit+) +char-attr-constituent+) 1064 ((= att +char-attr-invalid+) 1065 (simple-reader-error stream "invalid constituent")) 1066 (t att)))))) 1067 1068;;; Return the character class for a char which might be part of a 1069;;; rational or floating number. (Assume that it is a digit if it 1070;;; could be.) 1071(defmacro char-class3 (char attarray atthash read-base) 1072 `(let ((att (if (typep (truly-the character ,char) 'base-char) 1073 (aref ,attarray (char-code ,char)) 1074 (gethash ,char ,atthash +char-attr-constituent+)))) 1075 (declare (fixnum att)) 1076 (cond 1077 ((<= att +char-attr-terminating-macro+) +char-attr-delimiter+) 1078 ((< att +char-attr-constituent+) att) 1079 (t (setf att (get-constituent-trait ,char)) 1080 (when possibly-rational 1081 (setq possibly-rational 1082 (or (digit-char-p ,char ,read-base) 1083 (= att +char-attr-constituent-slash+)))) 1084 (when possibly-float 1085 (setq possibly-float 1086 (or (digit-char-p ,char 10) 1087 (= att +char-attr-constituent-dot+)))) 1088 (cond 1089 ((digit-char-p ,char (max ,read-base 10)) 1090 (if (digit-char-p ,char ,read-base) 1091 (if (= att +char-attr-constituent-expt+) 1092 +char-attr-constituent-digit-or-expt+ 1093 +char-attr-constituent-digit+) 1094 +char-attr-constituent-decimal-digit+)) 1095 ((= att +char-attr-invalid+) 1096 (simple-reader-error stream "invalid constituent")) 1097 (t att)))))) 1098 1099;;;; token fetching 1100 1101(defvar *read-suppress* nil 1102 #!+sb-doc 1103 "Suppress most interpreting in the reader when T.") 1104 1105(defvar *read-base* 10 1106 #!+sb-doc 1107 "the radix that Lisp reads numbers in") 1108(declaim (type (integer 2 36) *read-base*)) 1109 1110;;; Normalize TOKEN-BUF to NFKC, returning a new TOKEN-BUF and the 1111;;; COLON value 1112(defun normalize-read-buffer (token-buf &optional colon) 1113 (when (or (token-buf-only-base-chars token-buf) 1114 (not (readtable-normalization *readtable*))) 1115 (return-from normalize-read-buffer (values token-buf colon))) 1116 (let ((current-buffer (copy-token-buf-string token-buf)) 1117 (old-escapes (copy-seq (token-buf-escapes token-buf))) 1118 (str-to-normalize (make-string (token-buf-fill-ptr token-buf))) 1119 (normalize-ptr 0) (escapes-ptr 0)) 1120 (reset-read-buffer token-buf) 1121 (macrolet ((clear-str-to-normalize () 1122 `(progn 1123 (loop for char across (sb!unicode:normalize-string 1124 (subseq str-to-normalize 0 normalize-ptr) 1125 :nfkc) do 1126 (ouch-read-buffer char token-buf)) 1127 (setf normalize-ptr 0))) 1128 (push-to-normalize (ch) 1129 (let ((ch-gen (gensym))) 1130 `(let ((,ch-gen ,ch)) 1131 (setf (char str-to-normalize normalize-ptr) ,ch-gen) 1132 (incf normalize-ptr))))) 1133 (loop for c across current-buffer 1134 for i from 0 1135 do 1136 (if (and (< escapes-ptr (length old-escapes)) 1137 (eql i (aref old-escapes escapes-ptr))) 1138 (progn 1139 (clear-str-to-normalize) 1140 (ouch-read-buffer-escaped c token-buf) 1141 (incf escapes-ptr)) 1142 (push-to-normalize c))) 1143 (clear-str-to-normalize) 1144 (values token-buf colon)))) 1145 1146;;; Modify the read buffer according to READTABLE-CASE, ignoring 1147;;; ESCAPES. ESCAPES is a vector of the escaped indices. 1148(defun casify-read-buffer (token-buf) 1149 (let ((case (readtable-case *readtable*)) 1150 (escapes (token-buf-escapes token-buf))) 1151 (cond 1152 ((and (zerop (length escapes)) (eq case :upcase)) 1153 (let ((buffer (token-buf-string token-buf))) 1154 (dotimes (i (token-buf-fill-ptr token-buf)) 1155 (declare (optimize (sb!c::insert-array-bounds-checks 0))) 1156 (setf (schar buffer i) (char-upcase (schar buffer i)))))) 1157 ((eq case :preserve)) 1158 (t 1159 (macrolet ((skip-esc (&body body) 1160 `(do ((i (1- (token-buf-fill-ptr token-buf)) (1- i)) 1161 (buffer (token-buf-string token-buf)) 1162 (esc (if (zerop (fill-pointer escapes)) 1163 -1 (vector-pop escapes)))) 1164 ((minusp i)) 1165 (declare (fixnum i) 1166 (optimize (sb!c::insert-array-bounds-checks 0))) 1167 (if (< esc i) 1168 (let ((ch (schar buffer i))) 1169 ,@body) 1170 (progn 1171 (aver (= esc i)) 1172 (setq esc (if (zerop (fill-pointer escapes)) 1173 -1 (vector-pop escapes)))))))) 1174 (flet ((lower-em () 1175 (skip-esc (setf (schar buffer i) (char-downcase ch)))) 1176 (raise-em () 1177 (skip-esc (setf (schar buffer i) (char-upcase ch))))) 1178 (ecase case 1179 (:upcase (raise-em)) 1180 (:downcase (lower-em)) 1181 (:invert 1182 (let ((all-upper t) 1183 (all-lower t) 1184 (fillptr (fill-pointer escapes))) 1185 (skip-esc 1186 (when (both-case-p ch) 1187 (if (upper-case-p ch) 1188 (setq all-lower nil) 1189 (setq all-upper nil)))) 1190 (setf (fill-pointer escapes) fillptr) 1191 (cond (all-lower (raise-em)) 1192 (all-upper (lower-em)))))))))))) 1193 1194(eval-when (:compile-toplevel :load-toplevel :execute) 1195 (defvar *reader-package* nil)) 1196(declaim (type (or null package) *reader-package*) 1197 (always-bound *reader-package*)) 1198 1199(defun reader-find-package (package-designator stream) 1200 (if (%instancep package-designator) 1201 package-designator 1202 (let ((package (find-package package-designator))) 1203 (cond (package 1204 ;; Release the token-buf that was used for the designator 1205 (release-token-buf (shiftf (token-buf-next *read-buffer*) nil)) 1206 package) 1207 (t 1208 (error 'simple-reader-package-error 1209 :package package-designator 1210 :stream stream 1211 :format-control "Package ~A does not exist." 1212 :format-arguments (list package-designator))))))) 1213 1214(defun read-token (stream firstchar) 1215 #!+sb-doc 1216 "Default readmacro function. Handles numbers, symbols, and SBCL's 1217extended <package-name>::<form-in-package> syntax." 1218 ;; Check explicitly whether FIRSTCHAR has an entry for 1219 ;; NON-TERMINATING in CHARACTER-ATTRIBUTE-TABLE and 1220 ;; READ-DOT-NUMBER-SYMBOL in CMT. Report an error if these are 1221 ;; violated. (If we called this, we want something that is a 1222 ;; legitimate token!) Read in the longest possible string satisfying 1223 ;; the Backus-Naur form for "unqualified-token". Leave the result in 1224 ;; the *READ-BUFFER*. Return next char after token (last char read). 1225 (when *read-suppress* 1226 (internal-read-extended-token stream firstchar nil) 1227 (return-from read-token nil)) 1228 (let* ((rt *readtable*) 1229 (base *read-base*) 1230 (attribute-array (character-attribute-array rt)) 1231 (attribute-hash-table (character-attribute-hash-table rt)) 1232 (buf *read-buffer*) 1233 (package-designator nil) 1234 (colons 0) 1235 (possibly-rational t) 1236 (seen-digit-or-expt nil) 1237 (possibly-float t) 1238 (was-possibly-float nil) 1239 (seen-multiple-escapes nil)) 1240 (declare (token-buf buf)) 1241 (reset-read-buffer buf) 1242 (macrolet ((getchar-or-else (what) 1243 `(when (eq (setq char (read-char stream nil +EOF+)) +EOF+) 1244 ,what))) 1245 (prog ((char firstchar)) 1246 (case (char-class3 char attribute-array attribute-hash-table base) 1247 (#.+char-attr-constituent-sign+ (go SIGN)) 1248 (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) 1249 (#.+char-attr-constituent-digit-or-expt+ 1250 (setq seen-digit-or-expt t) 1251 (go LEFTDIGIT)) 1252 (#.+char-attr-constituent-decimal-digit+ (go LEFTDECIMALDIGIT)) 1253 (#.+char-attr-constituent-dot+ (go FRONTDOT)) 1254 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1255 (#.+char-attr-package-delimiter+ (go COLON)) 1256 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1257 (#.+char-attr-invalid+ (simple-reader-error stream 1258 "invalid constituent")) 1259 ;; can't have eof, whitespace, or terminating macro as first char! 1260 (t (go SYMBOL))) 1261 SIGN ; saw "sign" 1262 (ouch-read-buffer char buf) 1263 (getchar-or-else (go RETURN-SYMBOL)) 1264 (setq possibly-rational t 1265 possibly-float t) 1266 (case (char-class3 char attribute-array attribute-hash-table base) 1267 (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) 1268 (#.+char-attr-constituent-digit-or-expt+ 1269 (setq seen-digit-or-expt t) 1270 (go LEFTDIGIT)) 1271 (#.+char-attr-constituent-decimal-digit+ (go LEFTDECIMALDIGIT)) 1272 (#.+char-attr-constituent-dot+ (go SIGNDOT)) 1273 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1274 (#.+char-attr-package-delimiter+ (go COLON)) 1275 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1276 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) 1277 (t (go SYMBOL))) 1278 LEFTDIGIT ; saw "[sign] {digit}+" 1279 (ouch-read-buffer char buf) 1280 (getchar-or-else (return (make-integer))) 1281 (setq was-possibly-float possibly-float) 1282 (case (char-class3 char attribute-array attribute-hash-table base) 1283 (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) 1284 (#.+char-attr-constituent-decimal-digit+ (if possibly-float 1285 (go LEFTDECIMALDIGIT) 1286 (go SYMBOL))) 1287 (#.+char-attr-constituent-dot+ (if possibly-float 1288 (go MIDDLEDOT) 1289 (go SYMBOL))) 1290 (#.+char-attr-constituent-digit-or-expt+ 1291 (if (or seen-digit-or-expt (not was-possibly-float)) 1292 (progn (setq seen-digit-or-expt t) (go LEFTDIGIT)) 1293 (progn (setq seen-digit-or-expt t) (go LEFTDIGIT-OR-EXPT)))) 1294 (#.+char-attr-constituent-expt+ 1295 (if was-possibly-float 1296 (go EXPONENT) 1297 (go SYMBOL))) 1298 (#.+char-attr-constituent-slash+ (if possibly-rational 1299 (go RATIO) 1300 (go SYMBOL))) 1301 (#.+char-attr-delimiter+ (unread-char char stream) 1302 (return (make-integer))) 1303 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1304 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1305 (#.+char-attr-package-delimiter+ (go COLON)) 1306 (t (go SYMBOL))) 1307 LEFTDIGIT-OR-EXPT 1308 (ouch-read-buffer char buf) 1309 (getchar-or-else (return (make-integer))) 1310 (case (char-class3 char attribute-array attribute-hash-table base) 1311 (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) 1312 (#.+char-attr-constituent-decimal-digit+ (bug "impossible!")) 1313 (#.+char-attr-constituent-dot+ (go SYMBOL)) 1314 (#.+char-attr-constituent-digit-or-expt+ (go LEFTDIGIT)) 1315 (#.+char-attr-constituent-expt+ (go SYMBOL)) 1316 (#.+char-attr-constituent-sign+ (go EXPTSIGN)) 1317 (#.+char-attr-constituent-slash+ (if possibly-rational 1318 (go RATIO) 1319 (go SYMBOL))) 1320 (#.+char-attr-delimiter+ (unread-char char stream) 1321 (return (make-integer))) 1322 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1323 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1324 (#.+char-attr-package-delimiter+ (go COLON)) 1325 (t (go SYMBOL))) 1326 LEFTDECIMALDIGIT ; saw "[sign] {decimal-digit}+" 1327 (aver possibly-float) 1328 (ouch-read-buffer char buf) 1329 (getchar-or-else (go RETURN-SYMBOL)) 1330 (case (char-class char attribute-array attribute-hash-table) 1331 (#.+char-attr-constituent-digit+ (go LEFTDECIMALDIGIT)) 1332 (#.+char-attr-constituent-dot+ (go MIDDLEDOT)) 1333 (#.+char-attr-constituent-expt+ (go EXPONENT)) 1334 (#.+char-attr-constituent-slash+ (aver (not possibly-rational)) 1335 (go SYMBOL)) 1336 (#.+char-attr-delimiter+ (unread-char char stream) 1337 (go RETURN-SYMBOL)) 1338 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1339 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1340 (#.+char-attr-package-delimiter+ (go COLON)) 1341 (t (go SYMBOL))) 1342 MIDDLEDOT ; saw "[sign] {digit}+ dot" 1343 (ouch-read-buffer char buf) 1344 (getchar-or-else (return (make-integer 10))) 1345 (case (char-class char attribute-array attribute-hash-table) 1346 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) 1347 (#.+char-attr-constituent-expt+ (go EXPONENT)) 1348 (#.+char-attr-delimiter+ 1349 (unread-char char stream) 1350 (return (make-integer 10))) 1351 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1352 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1353 (#.+char-attr-package-delimiter+ (go COLON)) 1354 (t (go SYMBOL))) 1355 RIGHTDIGIT ; saw "[sign] {decimal-digit}* dot {digit}+" 1356 (ouch-read-buffer char buf) 1357 (getchar-or-else (return (make-float stream))) 1358 (case (char-class char attribute-array attribute-hash-table) 1359 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) 1360 (#.+char-attr-constituent-expt+ (go EXPONENT)) 1361 (#.+char-attr-delimiter+ 1362 (unread-char char stream) 1363 (return (make-float stream))) 1364 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1365 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1366 (#.+char-attr-package-delimiter+ (go COLON)) 1367 (t (go SYMBOL))) 1368 SIGNDOT ; saw "[sign] dot" 1369 (ouch-read-buffer char buf) 1370 (getchar-or-else (go RETURN-SYMBOL)) 1371 (case (char-class char attribute-array attribute-hash-table) 1372 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) 1373 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) 1374 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1375 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1376 (t (go SYMBOL))) 1377 FRONTDOT ; saw "dot" 1378 (ouch-read-buffer char buf) 1379 (getchar-or-else (simple-reader-error stream "dot context error")) 1380 (case (char-class char attribute-array attribute-hash-table) 1381 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) 1382 (#.+char-attr-constituent-dot+ (go DOTS)) 1383 (#.+char-attr-delimiter+ (simple-reader-error stream 1384 "dot context error")) 1385 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1386 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1387 (#.+char-attr-package-delimiter+ (go COLON)) 1388 (t (go SYMBOL))) 1389 EXPONENT 1390 (ouch-read-buffer char buf) 1391 (getchar-or-else (go RETURN-SYMBOL)) 1392 (setq possibly-float t) 1393 (case (char-class char attribute-array attribute-hash-table) 1394 (#.+char-attr-constituent-sign+ (go EXPTSIGN)) 1395 (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) 1396 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) 1397 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1398 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1399 (#.+char-attr-package-delimiter+ (go COLON)) 1400 (t (go SYMBOL))) 1401 EXPTSIGN ; got to EXPONENT, and saw a sign character 1402 (ouch-read-buffer char buf) 1403 (getchar-or-else (go RETURN-SYMBOL)) 1404 (case (char-class char attribute-array attribute-hash-table) 1405 (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) 1406 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) 1407 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1408 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1409 (#.+char-attr-package-delimiter+ (go COLON)) 1410 (t (go SYMBOL))) 1411 EXPTDIGIT ; got to EXPONENT, saw "[sign] {digit}+" 1412 (ouch-read-buffer char buf) 1413 (getchar-or-else (return (make-float stream))) 1414 (case (char-class char attribute-array attribute-hash-table) 1415 (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) 1416 (#.+char-attr-delimiter+ 1417 (unread-char char stream) 1418 (return (make-float stream))) 1419 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1420 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1421 (#.+char-attr-package-delimiter+ (go COLON)) 1422 (t (go SYMBOL))) 1423 RATIO ; saw "[sign] {digit}+ slash" 1424 (ouch-read-buffer char buf) 1425 (getchar-or-else (go RETURN-SYMBOL)) 1426 (case (char-class2 char attribute-array attribute-hash-table base) 1427 (#.+char-attr-constituent-digit+ (go RATIODIGIT)) 1428 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) 1429 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1430 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1431 (#.+char-attr-package-delimiter+ (go COLON)) 1432 (t (go SYMBOL))) 1433 RATIODIGIT ; saw "[sign] {digit}+ slash {digit}+" 1434 (ouch-read-buffer char buf) 1435 (getchar-or-else (return (make-ratio stream))) 1436 (case (char-class2 char attribute-array attribute-hash-table base) 1437 (#.+char-attr-constituent-digit+ (go RATIODIGIT)) 1438 (#.+char-attr-delimiter+ 1439 (unread-char char stream) 1440 (return (make-ratio stream))) 1441 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1442 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1443 (#.+char-attr-package-delimiter+ (go COLON)) 1444 (t (go SYMBOL))) 1445 DOTS ; saw "dot {dot}+" 1446 (ouch-read-buffer char buf) 1447 (getchar-or-else (simple-reader-error stream "too many dots")) 1448 (case (char-class char attribute-array attribute-hash-table) 1449 (#.+char-attr-constituent-dot+ (go DOTS)) 1450 (#.+char-attr-delimiter+ 1451 (unread-char char stream) 1452 (simple-reader-error stream "too many dots")) 1453 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1454 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1455 (#.+char-attr-package-delimiter+ (go COLON)) 1456 (t (go SYMBOL))) 1457 SYMBOL ; not a dot, dots, or number 1458 (let ((stream (in-synonym-of stream))) 1459 (macrolet 1460 ((scan (read-a-char &optional finish) 1461 `(prog () 1462 SYMBOL-LOOP 1463 (ouch-read-buffer char buf) 1464 (setq char ,read-a-char) 1465 (when (eq char +EOF+) (go RETURN-SYMBOL)) 1466 (case (char-class char attribute-array attribute-hash-table) 1467 (#.+char-attr-single-escape+ ,finish (go SINGLE-ESCAPE)) 1468 (#.+char-attr-delimiter+ ,finish 1469 (unread-char char stream) 1470 (go RETURN-SYMBOL)) 1471 (#.+char-attr-multiple-escape+ ,finish (go MULT-ESCAPE)) 1472 (#.+char-attr-package-delimiter+ ,finish (go COLON)) 1473 (t (go SYMBOL-LOOP)))))) 1474 (if (ansi-stream-p stream) 1475 (prepare-for-fast-read-char stream 1476 (scan (fast-read-char nil +EOF+) (done-with-fast-read-char))) 1477 ;; CLOS stream 1478 (scan (read-char stream nil +EOF+))))) 1479 SINGLE-ESCAPE ; saw a single-escape 1480 ;; Don't put the escape character in the read buffer. 1481 ;; READ-NEXT CHAR, put in buffer (no case conversion). 1482 (let ((nextchar (read-char stream nil +EOF+))) 1483 (when (eq nextchar +EOF+) 1484 (reader-eof-error stream "after single-escape character")) 1485 (ouch-read-buffer-escaped nextchar buf)) 1486 (getchar-or-else (go RETURN-SYMBOL)) 1487 (case (char-class char attribute-array attribute-hash-table) 1488 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) 1489 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1490 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1491 (#.+char-attr-package-delimiter+ (go COLON)) 1492 (t (go SYMBOL))) 1493 MULT-ESCAPE 1494 (setq seen-multiple-escapes t) 1495 ;; sometimes we pass eof-error=nil but check. here we just let it err. 1496 ;; should pick one style and stick with it. 1497 (do ((char (read-char stream t) (read-char stream t))) 1498 ((multiple-escape-p char rt)) 1499 (if (single-escape-p char rt) (setq char (read-char stream t))) 1500 (ouch-read-buffer-escaped char buf)) 1501 (getchar-or-else (go RETURN-SYMBOL)) 1502 (case (char-class char attribute-array attribute-hash-table) 1503 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) 1504 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1505 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1506 (#.+char-attr-package-delimiter+ (go COLON)) 1507 (t (go SYMBOL))) 1508 COLON 1509 (unless (zerop colons) 1510 (simple-reader-error 1511 stream "too many colons in ~S" (copy-token-buf-string buf))) 1512 (setf buf (normalize-read-buffer buf)) 1513 (casify-read-buffer buf) 1514 (setq colons 1) 1515 (setq package-designator 1516 (if (or (plusp (token-buf-fill-ptr buf)) seen-multiple-escapes) 1517 (prog1 (sized-token-buf-string buf) 1518 (let ((new (acquire-token-buf))) 1519 (setf (token-buf-next new) buf ; new points to old 1520 buf new *read-buffer* new))) 1521 *keyword-package*)) 1522 (reset-read-buffer buf) 1523 (getchar-or-else (reader-eof-error stream "after reading a colon")) 1524 (case (char-class char attribute-array attribute-hash-table) 1525 (#.+char-attr-delimiter+ 1526 (unread-char char stream) 1527 (simple-reader-error stream 1528 "illegal terminating character after a colon: ~S" 1529 char)) 1530 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1531 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1532 (#.+char-attr-package-delimiter+ (go INTERN)) 1533 (t (go SYMBOL))) 1534 INTERN 1535 (setq colons 2) 1536 (getchar-or-else (reader-eof-error stream "after reading a colon")) 1537 (case (char-class char attribute-array attribute-hash-table) 1538 (#.+char-attr-delimiter+ 1539 (unread-char char stream) 1540 (if package-designator 1541 (let* ((*reader-package* 1542 (reader-find-package package-designator stream))) 1543 (return (read stream t nil t))) 1544 (simple-reader-error stream 1545 "illegal terminating character after a double-colon: ~S" 1546 char))) 1547 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE)) 1548 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) 1549 (#.+char-attr-package-delimiter+ 1550 (simple-reader-error stream 1551 "too many colons after ~S name" 1552 package-designator)) 1553 (t (go SYMBOL))) 1554 RETURN-SYMBOL 1555 (setf buf (normalize-read-buffer buf)) 1556 (casify-read-buffer buf) 1557 (let* ((pkg (if package-designator 1558 (reader-find-package package-designator stream) 1559 (or *reader-package* (sane-package)))) 1560 (intern-p (or (/= colons 1) (eq pkg *keyword-package*)))) 1561 (unless intern-p ; Try %FIND-SYMBOL 1562 (multiple-value-bind (symbol accessibility) 1563 (%find-symbol (token-buf-string buf) (token-buf-fill-ptr buf) pkg) 1564 (when (eq accessibility :external) (return symbol)) 1565 (with-simple-restart (continue "Use symbol anyway.") 1566 (error 'simple-reader-package-error 1567 :package pkg 1568 :stream stream 1569 :format-arguments 1570 (list (copy-token-buf-string buf) (package-name pkg)) 1571 :format-control 1572 (if accessibility 1573 "The symbol ~S is not external in the ~A package." 1574 "Symbol ~S not found in the ~A package."))))) 1575 (return (%intern (token-buf-string buf) 1576 (token-buf-fill-ptr buf) 1577 pkg 1578 (token-elt-type (token-buf-only-base-chars buf))))))))) 1579 1580;;; For semi-external use: Return 3 values: the token-buf, 1581;;; a flag for whether there was an escape char, and the position of 1582;;; any package delimiter. The returned token-buf is not case-converted. 1583(defun read-extended-token (stream) 1584 ;; recursive-p = T is basically irrelevant. 1585 (let ((first-char (read-char stream nil +EOF+ t))) 1586 (if (neq first-char +EOF+) 1587 (internal-read-extended-token stream first-char nil) 1588 (values (reset-read-buffer *read-buffer*) nil nil)))) 1589 1590;;; for semi-external use: 1591;;; 1592;;; Read an extended token with the first character escaped. Return 1593;;; the token-buf. The returned token-buf is not case-converted. 1594(defun read-extended-token-escaped (stream) 1595 (let ((first-char (read-char stream nil +EOF+))) 1596 (if (neq first-char +EOF+) 1597 (values (internal-read-extended-token stream first-char t)) 1598 (reader-eof-error stream "after escape")))) 1599 1600;;;; number-reading functions 1601 1602;; Mapping of read-base to the max input characters in a positive fixnum. 1603(eval-when (:compile-toplevel :execute) 1604 (defun integer-reader-safe-digits () 1605 (do ((a (make-array 35 :element-type '(unsigned-byte 8))) 1606 (base 2 (1+ base))) 1607 ((> base 36) a) 1608 (do ((total (1- base) (+ (* total base) (1- base))) 1609 (n-digits 0 (1+ n-digits))) 1610 ((sb!xc:typep total 'bignum) 1611 (setf (aref a (- base 2)) n-digits)) 1612 ;; empty DO body 1613 ))) 1614 1615 ;; self-test 1616 (do ((maxdigits (integer-reader-safe-digits)) 1617 (base 2 (1+ base))) 1618 ((> base 36)) 1619 (let* ((n-digits (aref maxdigits (- base 2))) 1620 (d (char (write-to-string (1- base) :base base) 0)) 1621 (string (make-string (1+ n-digits) :initial-element d))) ; 1 extra 1622 (assert (not (typep (parse-integer string :radix base) 1623 `(unsigned-byte ,sb!vm:n-positive-fixnum-bits)))) 1624 (assert (typep (parse-integer string :end n-digits :radix base) 1625 `(unsigned-byte ,sb!vm:n-positive-fixnum-bits)))))) 1626 1627(defmacro !setq-optional-leading-sign (sign-flag token-buf rewind) 1628 ;; guaranteed to have at least one character in buffer at the start 1629 ;; or immediately following [ESFDL] marker depending on 'rewind' flag. 1630 `(locally (declare (optimize (sb!c::insert-array-bounds-checks 0))) 1631 (,(if rewind 'setf 'incf) 1632 (token-buf-cursor ,token-buf) 1633 (case (elt (token-buf-string ,token-buf) 1634 ,(if rewind 0 `(token-buf-cursor ,token-buf))) 1635 (#\- (setq ,sign-flag t) 1) 1636 (#\+ 1) 1637 (t 0))))) 1638 1639(defun make-integer (&optional (base *read-base*)) 1640 #!+sb-doc 1641 "Minimizes bignum-fixnum multiplies by reading a 'safe' number of digits, 1642 then multiplying by a power of the base and adding." 1643 (declare ((integer 2 36) base) 1644 (inline token-buf-getchar)) ; makes for smaller code 1645 (let* ((fixnum-max-digits 1646 (macrolet ((maxdigits () 1647 (!coerce-to-specialized (integer-reader-safe-digits) 1648 '(unsigned-byte 8)))) 1649 (aref (maxdigits) (- base 2)))) 1650 (base-power 1651 (macrolet ((base-powers () 1652 (do ((maxdigits (integer-reader-safe-digits)) 1653 (a (make-array 35)) 1654 (base 2 (1+ base))) 1655 ((> base 36) a) 1656 (setf (aref a (- base 2)) 1657 (expt base (aref maxdigits (- base 2))))))) 1658 (truly-the integer (aref (base-powers) (- base 2))))) 1659 (negativep nil) 1660 (result 0) 1661 (buf *read-buffer*)) 1662 (!setq-optional-leading-sign negativep buf t) 1663 (loop 1664 (let ((acc 0)) 1665 (declare (type (and fixnum unsigned-byte) acc)) 1666 (dotimes (digit-count fixnum-max-digits) 1667 (let ((ch (token-buf-getchar buf))) 1668 (if (or (not ch) (eql ch #\.)) 1669 (return-from make-integer 1670 (let ((result 1671 (if (zerop result) acc 1672 (+ (* result (expt base digit-count)) acc)))) 1673 (if negativep (- result) result))) 1674 (setq acc (truly-the fixnum 1675 (+ (digit-char-p ch base) 1676 (truly-the fixnum (* acc base)))))))) 1677 (setq result (+ (* result base-power) acc)))))) 1678 1679(defun truncate-exponent (exponent number divisor) 1680 #!+sb-doc 1681 "Truncate exponent if it's too large for a float" 1682 ;; Work with base-2 logarithms to avoid conversions to floats, 1683 ;; and convert to base-10 conservatively at the end. 1684 ;; Use the least positive float, because denormalized exponent 1685 ;; can be larger than normalized. 1686 (let* ((max-exponent 1687 #!-long-float 1688 (+ sb!vm:double-float-digits sb!vm:double-float-bias)) 1689 (number-magnitude (integer-length number)) 1690 (divisor-magnitude (1- (integer-length divisor))) 1691 (magnitude (- number-magnitude divisor-magnitude))) 1692 (if (minusp exponent) 1693 (max exponent (ceiling (- (+ max-exponent magnitude)) 1694 #.(floor (log 10 2)))) 1695 (min exponent (floor (- max-exponent magnitude) 1696 #.(floor (log 10 2))))))) 1697 1698(defun make-float (stream) 1699 ;; Assume that the contents of *read-buffer* are a legal float, with nothing 1700 ;; else after it. 1701 (let ((buf *read-buffer*) 1702 (negative-fraction nil) 1703 (number 0) 1704 (divisor 1) 1705 (negative-exponent nil) 1706 (exponent 0) 1707 (float-char ()) 1708 char) 1709 (!setq-optional-leading-sign negative-fraction buf t) 1710 ;; Read digits before the dot. 1711 (macrolet ((accumulate (expr) 1712 `(let (digit) 1713 (loop (if (and (setq char (token-buf-getchar buf)) 1714 (setq digit (digit-char-p char))) 1715 ,expr 1716 (return)))))) 1717 (accumulate (setq number (+ (* number 10) digit))) 1718 ;; Deal with the dot, if it's there. 1719 (when (char= char #\.) 1720 ;; Read digits after the dot. 1721 (accumulate (setq divisor (* divisor 10) 1722 number (+ (* number 10) digit)))) 1723 ;; Is there an exponent letter? 1724 (cond 1725 ((null char) 1726 ;; If not, we've read the whole number. 1727 (let ((num (make-float-aux number divisor 1728 *read-default-float-format* 1729 stream))) 1730 (return-from make-float (if negative-fraction (- num) num)))) 1731 ((= (get-constituent-trait char) +char-attr-constituent-expt+) 1732 (setq float-char char) 1733 ;; Check leading sign. 1734 (!setq-optional-leading-sign negative-exponent buf nil) 1735 ;; Read digits for exponent. 1736 (accumulate (setq exponent (+ (* exponent 10) digit))) 1737 (setq exponent (if negative-exponent (- exponent) exponent)) 1738 ;; Generate and return the float, depending on FLOAT-CHAR: 1739 (let* ((float-format (case (char-upcase float-char) 1740 (#\E *read-default-float-format*) 1741 (#\S 'short-float) 1742 (#\F 'single-float) 1743 (#\D 'double-float) 1744 (#\L 'long-float))) 1745 (exponent (truncate-exponent exponent number divisor)) 1746 (result (make-float-aux (* (expt 10 exponent) number) 1747 divisor float-format stream))) 1748 (return-from make-float 1749 (if negative-fraction (- result) result)))) 1750 (t (bug "bad fallthrough in floating point reader")))))) 1751 1752(defun make-float-aux (number divisor float-format stream) 1753 (handler-case 1754 (coerce (/ number divisor) float-format) 1755 (type-error (c) 1756 (error 'reader-impossible-number-error 1757 :error c :stream stream 1758 :format-control "failed to build float from ~a" 1759 :format-arguments (list (copy-token-buf-string *read-buffer*)))))) 1760 1761(defun make-ratio (stream) 1762 ;; Assume *READ-BUFFER* contains a legal ratio. Build the number from 1763 ;; the string. 1764 ;; This code is inferior to that of MAKE-INTEGER because it makes no 1765 ;; attempt to perform as few bignum multiplies as possible. 1766 ;; 1767 (let ((numerator 0) (denominator 0) (negativep nil) 1768 (base *read-base*) (buf *read-buffer*)) 1769 (!setq-optional-leading-sign negativep buf t) 1770 ;; Get numerator. 1771 (loop (let ((dig (digit-char-p (token-buf-getchar buf) base))) 1772 (if dig 1773 (setq numerator (+ (* numerator base) dig)) 1774 (return)))) 1775 ;; Get denominator. 1776 (do* ((ch (token-buf-getchar buf) (token-buf-getchar buf)) 1777 (dig ())) 1778 ((or (null ch) (not (setq dig (digit-char-p ch base))))) 1779 (setq denominator (+ (* denominator base) dig))) 1780 (let ((num (handler-case 1781 (/ numerator denominator) 1782 (arithmetic-error (c) 1783 (error 'reader-impossible-number-error 1784 :error c :stream stream 1785 :format-control "failed to build ratio"))))) 1786 (if negativep (- num) num)))) 1787 1788;;;; General reader for dispatch macros 1789 1790(defun dispatch-char-error (stream sub-char ignore) 1791 (declare (optimize allow-non-returning-tail-call)) 1792 (declare (ignore ignore)) 1793 (if *read-suppress* 1794 (values) 1795 (simple-reader-error stream 1796 "no dispatch function defined for ~S" 1797 sub-char))) 1798 1799(defun read-dispatch-char (stream dispatch-table) 1800 ;; Read some digits. 1801 (let ((numargp nil) 1802 (numarg 0) 1803 (sub-char ())) 1804 (loop 1805 (let ((ch (read-char stream nil +EOF+))) 1806 (if (eq ch +EOF+) 1807 (reader-eof-error stream "inside dispatch character") 1808 ;; Take care of the extra char. 1809 (let ((dig (digit-char-p ch))) 1810 (if dig 1811 (setq numargp t numarg (+ (* numarg 10) dig)) 1812 (return (setq sub-char (char-upcase ch)))))))) 1813 ;; Look up the function and call it. 1814 (let ((fn (get-raw-cmt-dispatch-entry sub-char dispatch-table))) 1815 (funcall (!cmt-entry-to-function fn #'dispatch-char-error) 1816 stream sub-char (if numargp numarg nil))))) 1817 1818;;;; READ-FROM-STRING 1819 1820(declaim (ftype (sfunction (string t t index (or null index) t) (values t index)) 1821 %read-from-string)) 1822(defun %read-from-string (string eof-error-p eof-value start end preserve-whitespace) 1823 (with-array-data ((string string :offset-var offset) 1824 (start start) 1825 (end end) 1826 :check-fill-pointer t) 1827 (let ((stream (make-string-input-stream string start end))) 1828 (values (if preserve-whitespace 1829 (%read-preserving-whitespace stream eof-error-p eof-value nil) 1830 (read stream eof-error-p eof-value)) 1831 (- (string-input-stream-current stream) offset))))) 1832 1833(locally 1834(declare (muffle-conditions style-warning)) 1835(defun read-from-string (string &optional (eof-error-p t) eof-value 1836 &key (start 0) end preserve-whitespace) 1837 #!+sb-doc 1838 "The characters of string are successively given to the lisp reader 1839 and the lisp object built by the reader is returned. Macro chars 1840 will take effect." 1841 (declare (string string)) 1842 (maybe-note-read-from-string-signature-issue eof-error-p) 1843 (%read-from-string string eof-error-p eof-value start end preserve-whitespace))) 1844 1845;;;; PARSE-INTEGER 1846 1847(defun parse-integer (string &key (start 0) end (radix 10) junk-allowed) 1848 #!+sb-doc 1849 "Examine the substring of string delimited by start and end 1850 (default to the beginning and end of the string) It skips over 1851 whitespace characters and then tries to parse an integer. The 1852 radix parameter must be between 2 and 36." 1853 (flet ((parse-error (format-control) 1854 (declare (optimize allow-non-returning-tail-call)) 1855 (error 'simple-parse-error 1856 :format-control format-control 1857 :format-arguments (list string)))) 1858 (with-array-data ((string string :offset-var offset) 1859 (start start) 1860 (end end) 1861 :check-fill-pointer t) 1862 (let ((index (do ((i start (1+ i))) 1863 ((= i end) 1864 (if junk-allowed 1865 (return-from parse-integer (values nil end)) 1866 (parse-error "no non-whitespace characters in string ~S."))) 1867 (declare (fixnum i)) 1868 (unless (whitespace[1]p (char string i)) (return i)))) 1869 (minusp nil) 1870 (found-digit nil) 1871 (result 0)) 1872 (declare (fixnum index)) 1873 (let ((char (char string index))) 1874 (cond ((char= char #\-) 1875 (setq minusp t) 1876 (incf index)) 1877 ((char= char #\+) 1878 (incf index)))) 1879 (loop 1880 (when (= index end) (return nil)) 1881 (let* ((char (char string index)) 1882 (weight (digit-char-p char radix))) 1883 (cond (weight 1884 (setq result (+ weight (* result radix)) 1885 found-digit t)) 1886 (junk-allowed (return nil)) 1887 ((whitespace[1]p char) 1888 (loop 1889 (incf index) 1890 (when (= index end) (return)) 1891 (unless (whitespace[1]p (char string index)) 1892 (parse-error "junk in string ~S"))) 1893 (return nil)) 1894 (t 1895 (parse-error "junk in string ~S")))) 1896 (incf index)) 1897 (values 1898 (if found-digit 1899 (if minusp (- result) result) 1900 (if junk-allowed 1901 nil 1902 (parse-error "no digits in string ~S"))) 1903 (- index offset)))))) 1904 1905;;;; reader initialization code 1906 1907(defun !reader-cold-init () 1908 (!cold-init-constituent-trait-table) 1909 (!cold-init-standard-readtable)) 1910 1911(defmethod print-object ((readtable readtable) stream) 1912 (print-unreadable-object (readtable stream :identity t :type t))) 1913 1914;; Backward-compatibility adapter. The "named-readtables" system in 1915;; Quicklisp expects this interface, and it's a reasonable thing to support. 1916;; What is silly however is that DISPATCH-TABLES was an alist each of whose 1917;; values was a hashtable which got immediately coerced to an alist. 1918;; In anticipation of perhaps not doing an extra re-shaping, if HASH-TABLE-P 1919;; is NIL then return nested alists: ((#\# (#\R . #<FUNCTION SHARP-R>) ...)) 1920(defun dispatch-tables (readtable &optional (hash-table-p t)) 1921 (let (alist) 1922 (flet ((process (char fn &aux (dtable (%dispatch-macro-char-table fn))) 1923 (when dtable 1924 (let ((output (awhen (car dtable) (%hash-table-alist it)))) 1925 (loop for fn across (the simple-vector (cdr dtable)) 1926 and ch from 0 1927 when fn do (push (cons (code-char ch) fn) output)) 1928 (dolist (cell output) ; coerce values to function-designator 1929 (rplacd cell (!cmt-entry-to-fun-designator (cdr cell)))) 1930 (when hash-table-p ; caller wants hash-tables 1931 (setq output (%stuff-hash-table (make-hash-table) output))) 1932 (push (cons char output) alist))))) 1933 (loop for fn across (character-macro-array readtable) and ch from 0 1934 do (process (code-char ch) fn)) 1935 (maphash #'process (character-macro-hash-table readtable))) 1936 alist)) 1937 1938;; Stub - should never get called with anything but NIL 1939;; and only after all macros have been changed to constituents already. 1940(defun (setf dispatch-tables) (new-alist readtable) 1941 (declare (ignore readtable)) 1942 (unless (null new-alist) 1943 (error "Assignment to virtual DISPATCH-TABLES slot not allowed")) 1944 new-alist) 1945 1946;;; like LISTEN, but any whitespace in the input stream will be flushed 1947(defun listen-skip-whitespace (&optional (stream *standard-input*)) 1948 (do ((char (read-char-no-hang stream nil nil nil) 1949 (read-char-no-hang stream nil nil nil))) 1950 ((null char) nil) 1951 (cond ((not (whitespace[1]p char)) 1952 (unread-char char stream) 1953 (return t))))) 1954