1;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*- 2;;;; Jim Blandy <jimb@red-bean.com> --- September 1999 3;;;; 4;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010, 5;;;; 2012, 2013, 2014 Free Software Foundation, Inc. 6;;;; 7;;;; This library is free software; you can redistribute it and/or 8;;;; modify it under the terms of the GNU Lesser General Public 9;;;; License as published by the Free Software Foundation; either 10;;;; version 3 of the License, or (at your option) any later version. 11;;;; 12;;;; This library is distributed in the hope that it will be useful, 13;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;;;; Lesser General Public License for more details. 16;;;; 17;;;; You should have received a copy of the GNU Lesser General Public 18;;;; License along with this library; if not, write to the Free Software 19;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 21(define-module (test-suite test-regexp) 22 #:use-module (test-suite lib) 23 #:use-module (srfi srfi-1) 24 #:use-module (ice-9 regex)) 25 26(when (defined? 'setlocale) 27 (setlocale LC_ALL "C")) 28 29;; Don't fail if we can't display a test name to stdout/stderr. 30(set-port-conversion-strategy! (current-output-port) 'escape) 31(set-port-conversion-strategy! (current-error-port) 'escape) 32 33 34;;; Run a regexp-substitute or regexp-substitute/global test, once 35;;; providing a real port and once providing #f, requesting direct 36;;; string output. 37(define (vary-port func expected . args) 38 (pass-if "port is string port" 39 (equal? expected 40 (call-with-output-string 41 (lambda (port) 42 (apply func port args))))) 43 (pass-if "port is #f" 44 (equal? expected 45 (apply func #f args)))) 46 47(define (object->string obj) 48 (call-with-output-string 49 (lambda (port) 50 (write obj port)))) 51 52;;; 53;;; make-regexp 54;;; 55 56(with-test-prefix "make-regexp" 57 58 (pass-if-exception "no args" exception:wrong-num-args 59 (make-regexp)) 60 61 (pass-if-exception "bad pat arg" exception:wrong-type-arg 62 (make-regexp 'blah)) 63 64 ;; in guile prior to 1.6.5 make-regex didn't validate its flags args 65 (pass-if-exception "bad arg 2" exception:wrong-type-arg 66 (make-regexp "xyz" 'abc)) 67 68 (pass-if-exception "bad arg 3" exception:wrong-type-arg 69 (make-regexp "xyz" regexp/icase 'abc))) 70 71;;; 72;;; match:string 73;;; 74 75(with-test-prefix "match:string" 76 77 (pass-if "foo" 78 (string=? "foo" (match:string (string-match ".*" "foo")))) 79 80 (pass-if "foo offset 1" 81 (string=? "foo" (match:string (string-match ".*" "foo" 1))))) 82 83;;; 84;;; regexp-exec 85;;; 86 87(with-test-prefix "regexp-exec" 88 89 (pass-if-exception "non-integer offset" exception:wrong-type-arg 90 (let ((re (make-regexp "ab+"))) 91 (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg))) 92 93 (pass-if-exception "non-string input" exception:wrong-type-arg 94 (let ((re (make-regexp "ab+"))) 95 (regexp-exec re 'not-a-string))) 96 97 (pass-if-exception "non-string input, with offset" exception:wrong-type-arg 98 (let ((re (make-regexp "ab+"))) 99 (regexp-exec re 'not-a-string 5))) 100 101 ;; in guile 1.8.1 and earlier, a #\nul character in the input string was 102 ;; only detected in a critical section, and the resulting error throw 103 ;; abort()ed the program 104 (pass-if-exception "nul in input" exception:string-contains-nul 105 (let ((re (make-regexp "ab+"))) 106 (regexp-exec re (string #\a #\b (integer->char 0))))) 107 108 ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected 109 ;; inside a critical section, and the resulting error throw abort()ed the 110 ;; program 111 (pass-if-exception "non-integer flags" exception:wrong-type-arg 112 (let ((re (make-regexp "ab+"))) 113 (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg)))) 114 115;;; 116;;; fold-matches 117;;; 118 119(with-test-prefix "fold-matches" 120 121 (pass-if "without flags" 122 (equal? '("hello") 123 (fold-matches "^[a-z]+$" "hello" '() 124 (lambda (match result) 125 (cons (match:substring match) 126 result))))) 127 128 (pass-if "with flags" 129 ;; Prior to 1.8.6, passing an additional flag would not work. 130 (null? 131 (fold-matches "^[a-z]+$" "hello" '() 132 (lambda (match result) 133 (cons (match:substring match) 134 result)) 135 (logior regexp/notbol regexp/noteol)))) 136 137 (pass-if "regexp/notbol is set correctly" 138 (equal? '("foo") 139 (fold-matches "^foo" "foofoofoofoo" '() 140 (lambda (match result) 141 (cons (match:substring match) 142 result)))))) 143 144 145;;; 146;;; regexp-quote 147;;; 148 149(define-syntax with-ascii-or-latin1-locale 150 (syntax-rules () 151 ((_ chr body ...) 152 (if (> chr 127) 153 (with-latin1-locale body ...) 154 (begin body ...))))) 155 156(define char-code-limit 256) 157 158(with-test-prefix "regexp-quote" 159 160 (pass-if-exception "no args" exception:wrong-num-args 161 (regexp-quote)) 162 163 (pass-if-exception "bad string arg" exception:wrong-type-arg 164 (regexp-quote 'blah)) 165 166 (let ((lst `((regexp/basic ,regexp/basic) 167 (regexp/extended ,regexp/extended))) 168 ;; String of all latin-1 characters, except #\nul which doesn't 169 ;; work because it's the usual end-of-string for the underlying 170 ;; C regexec(). 171 (allchars (list->string (map integer->char (cdr (iota 256)))))) 172 (for-each 173 (lambda (elem) 174 (let ((name (car elem)) 175 (flag (cadr elem))) 176 177 (with-test-prefix name 178 179 ;; Try on each individual latin-1 character, except #\nul. 180 (do ((i 1 (1+ i))) 181 ((>= i 256)) 182 (let* ((c (integer->char i)) 183 (s (string c))) 184 (pass-if (list "char" i (format #f "~s ~s" c s)) 185 (with-ascii-or-latin1-locale i 186 (let* ((q (regexp-quote s)) 187 (m (regexp-exec (make-regexp q flag) s))) 188 (and (= 0 (match:start m)) 189 (= 1 (match:end m)))))))) 190 191 ;; Try on pattern "aX" where X is each latin-1 character, 192 ;; except #\nul. This exposes things like "?" which are 193 ;; special only when they follow a pattern to repeat or 194 ;; whatever ("a" in this case). 195 (do ((i 1 (1+ i))) 196 ((>= i 256)) 197 (let* ((c (integer->char i)) 198 (s (string #\a c)) 199 (q (regexp-quote s))) 200 (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q)) 201 (with-ascii-or-latin1-locale i 202 (let* ((m (regexp-exec (make-regexp q flag) s))) 203 (and (= 0 (match:start m)) 204 (= 2 (match:end m)))))))) 205 206 (pass-if "string of all chars" 207 (with-latin1-locale 208 (let ((m (regexp-exec (make-regexp (regexp-quote allchars) 209 flag) 210 allchars))) 211 (and (= 0 (match:start m)) 212 (= (string-length allchars) (match:end m))))))))) 213 lst))) 214 215;;; 216;;; regexp-substitute 217;;; 218 219(with-test-prefix "regexp-substitute" 220 (let ((match 221 (string-match "patleft(sub1)patmid(sub2)patright" 222 "contleftpatleftsub1patmidsub2patrightcontright"))) 223 (define (try expected . args) 224 (with-test-prefix (object->string args) 225 (apply vary-port regexp-substitute expected match args))) 226 227 (try "") 228 (try "string1" "string1") 229 (try "string1string2" "string1" "string2") 230 (try "patleftsub1patmidsub2patright" 0) 231 (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye") 232 (try "sub1" 1) 233 (try "hi-sub1-bye" "hi-" 1 "-bye") 234 (try "hi-sub2-bye" "hi-" 2 "-bye") 235 (try "contleft" 'pre) 236 (try "contright" 'post) 237 (try "contrightcontleft" 'post 'pre) 238 (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre) 239 (try "contrightsub2sub1contleft" 'post 2 1 'pre) 240 (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar"))) 241 242(with-test-prefix "regexp-substitute/global" 243 244 (define (try expected . args) 245 (with-test-prefix (object->string args) 246 (apply vary-port regexp-substitute/global expected args))) 247 248 (try "hi" "a(x*)b" "ab" "hi") 249 (try "" "a(x*)b" "ab" 1) 250 (try "xx" "a(x*)b" "axxb" 1) 251 (try "xx" "a(x*)b" "_axxb_" 1) 252 (try "pre" "a(x*)b" "preaxxbpost" 'pre) 253 (try "post" "a(x*)b" "preaxxbpost" 'post) 254 (try "string" "x" "string" 'pre "y" 'post) 255 (try "4" "a(x*)b" "_axxb_" (lambda (m) 256 (number->string (match:end m 1)))) 257 258 (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post) 259 260 ;; This should not go into an infinite loop, just because the regexp 261 ;; can match the empty string. This test also kind of beats on our 262 ;; definition of where a null string can match. 263 (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post) 264 265 ;; These kind of bother me. The extension from regexp-substitute to 266 ;; regexp-substitute/global is only natural if your item list 267 ;; includes both pre and post. If those are required, why bother 268 ;; to include them at all? 269 (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_" 270 (lambda (m) (number->string (match:end m 1))) ":" 271 'post) 272 (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_" 273 (lambda (m) (number->string (match:end m 1))) ":" 274 'post 275 ":" (lambda (m) (number->string (match:end m 1)))) 276 277 ;; Jan Nieuwenhuizen's bug, 2 Sep 1999 278 (try "" "_" (make-string 500 #\_) 279 'post)) 280 281(with-test-prefix "nonascii locales" 282 (pass-if "match structures refer to char offsets" 283 (with-locale "en_US.utf8" 284 ;; bug #31650 285 (equal? (match:substring (string-match ".*" "calçot") 0) 286 "calçot"))) 287 288 (pass-if "match structures refer to char offsets, non-ASCII pattern" 289 (with-locale "en_US.utf8" 290 ;; bug #31650 291 (equal? (match:substring (string-match "λ: The Ultimate (.*)" 292 "λ: The Ultimate GOTO") 293 1) 294 "GOTO")))) 295