1#lang scribble/doc
2@(require "mz.rkt")
3
4@title[#:tag "strings"]{Strings}
5
6@guideintro["strings"]{strings}
7
8A @deftech{string} is a fixed-length array of
9@seclink["characters"]{characters}.
10
11@index['("strings" "immutable")]{A} string can be @defterm{mutable} or
12@defterm{immutable}. When an immutable string is provided to a
13procedure like @racket[string-set!], the
14@exnraise[exn:fail:contract]. String constants generated by the
15default reader (see @secref["parse-string"]) are
16immutable, and they are @tech{interned} in @racket[read-syntax] mode.
17Use @racket[immutable?] to check whether a string is immutable.
18
19Two strings are @racket[equal?] when they have the same length and
20contain the same sequence of characters.
21
22A string can be used as a single-valued sequence (see
23@secref["sequences"]). The characters of the string serve as elements
24of the sequence. See also @racket[in-string].
25
26@see-read-print["string"]{strings}
27
28See also: @racket[immutable?], @racket[symbol->string],
29@racket[bytes->string/utf-8].
30
31@; ----------------------------------------
32@section{String Constructors, Selectors, and Mutators}
33
34@defproc[(string? [v any/c]) boolean?]{ Returns @racket[#t] if @racket[v]
35 is a string, @racket[#f] otherwise.
36
37@mz-examples[(string? "Apple") (string? 'apple)]}
38
39
40@defproc[(make-string [k exact-nonnegative-integer?] [char char?
41#\nul]) string?]{ Returns a new mutable string of length @racket[k] where
42each position in the string is initialized with the character
43@racket[char].
44
45@mz-examples[(make-string 5 #\z)]}
46
47
48@defproc[(string [char char?] ...) string?]{ Returns a new
49mutable string whose length is the number of provided @racket[char]s, and
50whose positions are initialized with the given @racket[char]s.
51
52@mz-examples[(string #\A #\p #\p #\l #\e)]}
53
54
55@defproc[(string->immutable-string [str string?]) (and/c string? immutable?)]{
56Returns an immutable string with the same content as
57 @racket[str], returning @racket[str] itself if @racket[str] is
58 immutable.}
59
60
61@defproc[(string-length [str string?]) exact-nonnegative-integer?]{
62 Returns the length of @racket[str].
63
64@mz-examples[(string-length "Apple")]}
65
66
67@defproc[(string-ref [str string?] [k exact-nonnegative-integer?])
68 char?]{  Returns the character at position @racket[k] in @racket[str].
69 The first position in the string corresponds to @racket[0], so the
70 position @racket[k] must be less than the length of the string,
71 otherwise the @exnraise[exn:fail:contract].
72
73@mz-examples[(string-ref "Apple" 0)]}
74
75
76@defproc[(string-set! [str (and/c string? (not/c immutable?))] [k
77 exact-nonnegative-integer?] [char char?]) void?]{  Changes the
78 character position @racket[k] in @racket[str] to @racket[char].  The first
79 position in the string corresponds to @racket[0], so the position
80 @racket[k] must be less than the length of the string, otherwise the
81 @exnraise[exn:fail:contract].
82
83@examples[(define s (string #\A #\p #\p #\l #\e))
84          (string-set! s 4 #\y)
85          s]}
86
87
88@defproc[(substring [str string?]
89                    [start exact-nonnegative-integer?]
90                    [end exact-nonnegative-integer? (string-length str)]) string?]{
91 Returns a new mutable string that is @racket[(- end start)]
92 characters long, and that contains the same characters as
93 @racket[str] from @racket[start] inclusive to @racket[end] exclusive.
94 The first position in a string corresponds to @racket[0], so
95 the @racket[start] and @racket[end] arguments must be less than or
96 equal to the length of @racket[str], and @racket[end] must be greater
97 than or equal to @racket[start], otherwise the
98 @exnraise[exn:fail:contract].
99
100@mz-examples[(substring "Apple" 1 3)
101             (substring "Apple" 1)]}
102
103
104@defproc[(string-copy [str string?]) string?]{ Returns
105 @racket[(substring str 0)].
106@examples[(define s1 "Yui")
107          (define pilot (string-copy s1))
108          (list s1 pilot)
109          (for ([i (in-naturals)] [ch '(#\R #\e #\i)])
110            (string-set! pilot i ch))
111          (list s1 pilot)]
112}
113
114
115@defproc[(string-copy! [dest (and/c string? (not/c immutable?))]
116                       [dest-start exact-nonnegative-integer?]
117                       [src string?]
118                       [src-start exact-nonnegative-integer? 0]
119                       [src-end exact-nonnegative-integer? (string-length src)])
120         void?]{
121
122 Changes the characters of @racket[dest] starting at position
123 @racket[dest-start] to match the characters in @racket[src] from
124 @racket[src-start] (inclusive) to @racket[src-end] (exclusive),
125 where the first position in a string corresponds to @racket[0]. The
126 strings @racket[dest] and @racket[src] can be the same string, and in
127 that case the destination region can overlap with the source region;
128 the destination characters after the copy match the source characters
129 from before the copy. If any of @racket[dest-start],
130 @racket[src-start], or @racket[src-end] are out of range (taking into
131 account the sizes of the strings and the source and destination
132 regions), the @exnraise[exn:fail:contract].
133
134@mz-examples[(define s (string #\A #\p #\p #\l #\e))
135             (string-copy! s 4 "y")
136             (string-copy! s 0 s 3 4)
137             s]}
138
139@defproc[(string-fill! [dest (and/c string? (not/c immutable?))] [char
140 char?]) void?]{ Changes @racket[dest] so that every position in the
141 string is filled with @racket[char].
142
143@mz-examples[(define s (string #\A #\p #\p #\l #\e))
144             (string-fill! s #\q)
145             s]}
146
147
148@defproc[(string-append [str string?] ...) string?]{
149
150@index['("strings" "concatenate")]{Returns} a new mutable string that is
151as long as the sum of the given @racket[str]s' lengths, and that
152contains the concatenated characters of the given @racket[str]s. If no
153@racket[str]s are provided, the result is a zero-length string.
154
155@mz-examples[(string-append "Apple" "Banana")]}
156
157
158@defproc[(string-append-immutable [str string?] ...) (and/c string? immutable?)]{
159
160The same as @racket[string-append], but the result is an immutable
161string.
162
163@mz-examples[(string-append-immutable "Apple" "Banana")
164             (immutable? (string-append-immutable "A" "B"))]
165
166@history[#:added "7.5.0.14"]}
167
168
169@defproc[(string->list [str string?]) (listof char?)]{ Returns a new
170 list of characters corresponding to the content of @racket[str]. That is,
171 the length of the list is @racket[(string-length str)], and the
172 sequence of characters in @racket[str] is the same sequence in the
173 result list.
174
175@mz-examples[(string->list "Apple")]}
176
177
178@defproc[(list->string [lst (listof char?)]) string?]{ Returns a new
179 mutable string whose content is the list of characters in @racket[lst].
180 That is, the length of the string is @racket[(length lst)], and
181 the sequence of characters in @racket[lst] is the same sequence in
182 the result string.
183
184@mz-examples[(list->string (list #\A #\p #\p #\l #\e))]}
185
186
187@defproc[(build-string [n exact-nonnegative-integer?]
188                       [proc (exact-nonnegative-integer? . -> . char?)])
189         string?]{
190
191Creates a string of @racket[n] characters by applying @racket[proc] to
192the integers from @racket[0] to @racket[(sub1 n)] in order. If
193@racket[_str] is the resulting string, then @racket[(string-ref _str
194_i)] is the character produced by @racket[(proc _i)].
195
196@mz-examples[
197(build-string 5 (lambda (i) (integer->char (+ i 97))))
198]}
199
200
201@; ----------------------------------------
202@section{String Comparisons}
203
204
205@defproc[(string=? [str1 string?] [str2 string?] ...) boolean?]{ Returns
206 @racket[#t] if all of the arguments are @racket[equal?].
207
208@mz-examples[(string=? "Apple" "apple")
209             (string=? "a" "as" "a")]
210
211@history/arity[]}
212
213@(define (string-sort direction folded?)
214(if folded?
215  @elem{Like @racket[string-ci<?], but checks whether the arguments would be @direction after case-folding.}
216  @elem{Like @racket[string<?], but checks whether the arguments are @|direction|.}))
217
218@defproc[(string<? [str1 string?] [str2 string?] ...) boolean?]{
219 Returns @racket[#t] if the arguments are lexicographically sorted
220 increasing, where individual characters are ordered by
221 @racket[char<?], @racket[#f] otherwise.
222
223@mz-examples[(string<? "Apple" "apple")
224             (string<? "apple" "Apple")
225             (string<? "a" "b" "c")]
226
227@history/arity[]}
228
229@defproc[(string<=? [str1 string?] [str2 string?] ...) boolean?]{
230 @string-sort["nondecreasing" #f]
231
232@mz-examples[(string<=? "Apple" "apple")
233             (string<=? "apple" "Apple")
234             (string<=? "a" "b" "b")]
235
236@history/arity[]}
237
238@defproc[(string>? [str1 string?] [str2 string?] ...) boolean?]{
239 @string-sort["decreasing" #f]
240
241@mz-examples[(string>? "Apple" "apple")
242             (string>? "apple" "Apple")
243             (string>? "c" "b" "a")]
244
245@history/arity[]}
246
247@defproc[(string>=? [str1 string?] [str2 string?] ...) boolean?]{
248 @string-sort["nonincreasing" #f]
249
250@mz-examples[(string>=? "Apple" "apple")
251             (string>=? "apple" "Apple")
252             (string>=? "c" "b" "b")]
253
254@history/arity[]}
255
256
257@defproc[(string-ci=? [str1 string?] [str2 string?] ...) boolean?]{
258 Returns @racket[#t] if all of the arguments are @racket[equal?] after
259 locale-insensitive case-folding via @racket[string-foldcase].
260
261@mz-examples[(string-ci=? "Apple" "apple")
262             (string-ci=? "a" "a" "a")]
263
264@history/arity[]}
265
266@defproc[(string-ci<? [str1 string?] [str2 string?] ...) boolean?]{
267 Like @racket[string<?], but checks whether the arguments would be in
268 increasing order if each was first case-folded using
269 @racket[string-foldcase] (which is locale-insensitive).
270
271@mz-examples[(string-ci<? "Apple" "apple")
272             (string-ci<? "apple" "banana")
273             (string-ci<? "a" "b" "c")]
274
275@history/arity[]}
276
277@defproc[(string-ci<=? [str1 string?] [str2 string?] ...) boolean?]{
278 @string-sort["nondecreasing" #t]
279
280@mz-examples[(string-ci<=? "Apple" "apple")
281             (string-ci<=? "apple" "Apple")
282             (string-ci<=? "a" "b" "b")]
283
284@history/arity[]}
285
286@defproc[(string-ci>? [str1 string?] [str2 string?] ...) boolean?]{
287 @string-sort["decreasing" #t]
288
289@mz-examples[(string-ci>? "Apple" "apple")
290             (string-ci>? "banana" "Apple")
291             (string-ci>? "c" "b" "a")]
292
293@history/arity[]}
294
295@defproc[(string-ci>=? [str1 string?] [str2 string?] ...) boolean?]{
296 @string-sort["nonincreasing" #t]
297
298@mz-examples[(string-ci>=? "Apple" "apple")
299             (string-ci>=? "apple" "Apple")
300             (string-ci>=? "c" "b" "b")]
301
302@history/arity[]}
303
304@; ----------------------------------------
305@section{String Conversions}
306
307@defproc[(string-upcase [str string?]) string?]{ Returns a string
308 whose characters are the upcase conversion of the characters in
309 @racket[str]. The conversion uses Unicode's locale-independent
310 conversion rules that map code-point sequences to code-point
311 sequences (instead of simply mapping a 1-to-1 function on code points
312 over the string), so the string produced by the conversion can be
313 longer than the input string.
314
315@mz-examples[
316(string-upcase "abc!")
317(string-upcase "Stra\xDFe")
318]}
319
320@defproc[(string-downcase [string string?]) string?]{ Like
321 @racket[string-upcase], but the downcase conversion.
322
323@mz-examples[
324(string-downcase "aBC!")
325(string-downcase "Stra\xDFe")
326(string-downcase "\u039A\u0391\u039F\u03A3")
327(string-downcase "\u03A3")
328]}
329
330
331@defproc[(string-titlecase [string string?]) string?]{ Like
332 @racket[string-upcase], but the titlecase conversion only for the
333 first character in each sequence of cased characters in @racket[str]
334 (ignoring case-ignorable characters).
335
336@mz-examples[
337(string-titlecase "aBC  twO")
338(string-titlecase "y2k")
339(string-titlecase "main stra\xDFe")
340(string-titlecase "stra \xDFe")
341]}
342
343@defproc[(string-foldcase [string string?]) string?]{ Like
344 @racket[string-upcase], but the case-folding conversion.
345
346@mz-examples[
347(string-foldcase "aBC!")
348(string-foldcase "Stra\xDFe")
349(string-foldcase "\u039A\u0391\u039F\u03A3")
350]}
351
352@defproc[(string-normalize-nfd [string string?]) string?]{ Returns a
353string that is the Unicode normalized form D of @racket[string]. If
354the given string is already in the corresponding Unicode normal form,
355the string may be returned directly as the result (instead of a newly
356allocated string).}
357
358@defproc[(string-normalize-nfkd [string string?]) string?]{ Like
359 @racket[string-normalize-nfd], but for normalized form KD.}
360
361@defproc[(string-normalize-nfc [string string?]) string?]{ Like
362 @racket[string-normalize-nfd], but for normalized form C.}
363
364@defproc[(string-normalize-nfkc [string string?]) string?]{ Like
365 @racket[string-normalize-nfd], but for normalized form KC.}
366
367@; ----------------------------------------
368@section{Locale-Specific String Operations}
369
370@defproc[(string-locale=? [str1 string?] [str2 string?] ...)
371 boolean?]{  Like @racket[string=?], but the strings are compared in a
372 locale-specific way, based on the value of @racket[current-locale]. See
373 @secref["encodings"] for more information on locales.
374
375@history/arity[]}
376
377@defproc[(string-locale<? [str1 string?] [str2 string?] ...+) boolean?]{
378 Like @racket[string<?], but the sort order compares strings in a
379 locale-specific way, based on the value of @racket[current-locale]. In
380 particular, the sort order may not be simply a lexicographic
381 extension of character ordering.
382
383@history/arity[]}
384
385@defproc[(string-locale>? [str1 string?] [str2 string?] ...)
386 boolean?]{  Like @racket[string>?], but locale-specific like
387 @racket[string-locale<?].
388
389@history/arity[]}
390
391@defproc[(string-locale-ci=? [str1 string?] [str2 string?] ...)
392 boolean?]{  Like @racket[string-locale=?], but strings are compared
393 using rules that are both locale-specific and case-insensitive
394 (depending on what ``case-insensitive'' means for the current
395 locale).
396
397@history/arity[]}
398
399@defproc[(string-locale-ci<? [str1 string?] [str2 string?] ...)
400 boolean?]{  Like @racket[string<?], but both locale-sensitive and
401 case-insensitive like @racket[string-locale-ci=?].
402
403@history/arity[]}
404
405@defproc[(string-locale-ci>? [str1 string?] [str2 string?] ...)
406 boolean?]{  Like @racket[string>?], but both locale-sensitive and
407 case-insensitive like @racket[string-locale-ci=?].
408
409@history/arity[]}
410
411@defproc[(string-locale-upcase [string string?]) string?]{ Like
412 @racket[string-upcase], but using locale-specific case-conversion
413 rules based on the value of @racket[current-locale].}
414
415@defproc[(string-locale-downcase [string string?]) string?]{ Like
416 @racket[string-downcase], but using locale-specific case-conversion
417 rules based on the value of @racket[current-locale].
418}
419
420@; ----------------------------------------
421@section{Additional String Functions}
422
423@note-lib[racket/string]
424@(define string-eval (make-base-eval))
425@examples[#:hidden #:eval string-eval (require racket/string racket/list)]
426
427@defproc[(string-append* [str string?] ... [strs (listof string?)]) string?]{
428@; Note: this is exactly the same description as the one for append*
429
430Like @racket[string-append], but the last argument is used as a list
431of arguments for @racket[string-append], so @racket[(string-append*
432str ... strs)] is the same as @racket[(apply string-append str
433... strs)].  In other words, the relationship between
434@racket[string-append] and @racket[string-append*] is similar to the
435one between @racket[list] and @racket[list*].
436
437@mz-examples[#:eval string-eval
438  (string-append* "a" "b" '("c" "d"))
439  (string-append* (cdr (append* (map (lambda (x) (list ", " x))
440                                     '("Alpha" "Beta" "Gamma")))))
441]}
442
443
444@defproc[(string-join [strs (listof string?)] [sep string? " "]
445                      [#:before-first before-first string? ""]
446                      [#:before-last  before-last  string? sep]
447                      [#:after-last   after-last   string? ""])
448         string?]{
449
450Appends the strings in @racket[strs], inserting @racket[sep] between
451each pair of strings in @racket[strs].  @racket[before-last],
452@racket[before-first], and @racket[after-last] are analogous to the
453inputs of @racket[add-between]: they specify an alternate separator
454between the last two strings, a prefix string, and a suffix string
455respectively.
456
457@mz-examples[#:eval string-eval
458  (string-join '("one" "two" "three" "four"))
459  (string-join '("one" "two" "three" "four") ", ")
460  (string-join '("one" "two" "three" "four") " potato ")
461  (string-join '("x" "y" "z") ", "
462               #:before-first "Todo: "
463               #:before-last " and "
464               #:after-last ".")
465]}
466
467
468@defproc[(string-normalize-spaces [str string?]
469                                  [sep (or/c string? regexp?) #px"\\s+"]
470                                  [space string? " "]
471                                  [#:trim? trim? any/c #t]
472                                  [#:repeat? repeat? any/c #f])
473         string?]{
474
475Normalizes spaces in the input @racket[str] by trimming it (using
476@racket[string-trim] and @racket[sep]) and replacing all whitespace
477sequences in the result with @racket[space], which defaults to a
478single space.
479
480@mz-examples[#:eval string-eval
481  (string-normalize-spaces "  foo bar  baz \r\n\t")
482]
483
484The result of @racket[(string-normalize-spaces str sep space)] is the same
485as @racket[(string-join (string-split str sep ....) space)].}
486
487
488@defproc[(string-replace [str  string?]
489                         [from (or/c string? regexp?)]
490                         [to   string?]
491                         [#:all? all? any/c #t])
492         string?]{
493
494Returns @racket[str] with all occurrences of @racket[from] replaced
495with by @racket[to]. If @racket[from] is a string, it is matched
496literally (as opposed to being used as a @tech{regular expression}).
497
498By default, all occurrences are replaced, but only the first match is
499replaced if @racket[all?] is @racket[#f].
500
501@mz-examples[#:eval string-eval
502  (string-replace "foo bar baz" "bar" "blah")
503]}
504
505
506@defproc[(string-split [str string?]
507                       [sep (or/c string? regexp?) #px"\\s+"]
508                       [#:trim? trim? any/c #t]
509                       [#:repeat? repeat? any/c #f])
510         (listof string?)]{
511
512Splits the input @racket[str] on @racket[sep], returning a list of
513substrings of @racket[str] that are separated by @racket[sep], defaulting
514to splitting the input on whitespaces. The
515input is first trimmed using @racket[sep] (see @racket[string-trim]),
516unless @racket[trim?] is @racket[#f]. Empty matches are handled in the
517same way as for @racket[regexp-split]. As a special case, if
518@racket[str] is the empty string after trimming, the result is
519@racket['()] instead of @racket['("")].
520
521Like @racket[string-trim], provide @racket[sep] to use a different separator,
522and @racket[repeat?]  controls matching repeated sequences.
523
524@mz-examples[#:eval string-eval
525  (string-split "  foo bar  baz \r\n\t")
526  (string-split "  ")
527  (string-split "  " #:trim? #f)
528]}
529
530
531@defproc[(string-trim [str string?]
532                      [sep (or/c string? regexp?) #px"\\s+"]
533                      [#:left? left? any/c #t]
534                      [#:right? right? any/c #t]
535                      [#:repeat? repeat? any/c #f])
536         string?]{
537
538Trims the input @racket[str] by removing prefix and suffix @racket[sep],
539which defaults to whitespace. A string @racket[sep] is matched literally
540(as opposed to being used as a @tech{regular expression}).
541
542Use @racket[#:left? #f] or @racket[#:right? #f] to suppress trimming
543the corresponding side.  When @racket[repeat?] is @racket[#f] (the
544default), only one match is removed from each side; when
545@racket[repeat?] is true, all initial or trailing matches are
546trimmed (which is an alternative to using a @tech{regular expression}
547@racket[sep] that contains @litchar{+}).
548
549@mz-examples[#:eval string-eval
550  (string-trim "  foo bar  baz \r\n\t")
551  (string-trim "  foo bar  baz \r\n\t" " " #:repeat? #t)
552  (string-trim "aaaxaayaa" "aa")
553]}
554
555@defproc[(non-empty-string? [x any/c]) boolean?]{
556Returns @racket[#t] if @racket[x] is a string and is not empty;
557returns @racket[#f] otherwise.
558@history[#:added "6.3"]{}
559}
560
561@deftogether[(
562@defproc[(string-contains? [s string?] [contained string?]) boolean?]
563@defproc[(string-prefix? [s string?] [prefix string?]) boolean?]
564@defproc[(string-suffix? [s string?] [suffix string?]) boolean?])]{
565Checks whether @racket[s] includes at any location, start with, or ends with
566the second argument, respectively.
567
568@mz-examples[#:eval string-eval
569  (string-prefix? "Racket" "R")
570  (string-prefix? "Jacket" "R")
571  (string-suffix? "Racket" "et")
572  (string-contains? "Racket" "ack")
573]
574
575@history[#:added "6.3"]{}
576}
577
578
579@; ----------------------------------------
580@include-section["format.scrbl"]
581
582@; ----------------------------------------
583@close-eval[string-eval]
584