1 // Written in the D programming language.
2
3 /**
4 String handling functions.
5
6 $(SCRIPT inhibitQuickIndex = 1;)
7
8 $(DIVC quickindex,
9 $(BOOKTABLE ,
10 $(TR $(TH Category) $(TH Functions) )
11 $(TR $(TDNW Searching)
12 $(TD
13 $(MYREF column)
14 $(MYREF indexOf)
15 $(MYREF indexOfAny)
16 $(MYREF indexOfNeither)
17 $(MYREF lastIndexOf)
18 $(MYREF lastIndexOfAny)
19 $(MYREF lastIndexOfNeither)
20 )
21 )
22 $(TR $(TDNW Comparison)
23 $(TD
24 $(MYREF isNumeric)
25 )
26 )
27 $(TR $(TDNW Mutation)
28 $(TD
29 $(MYREF capitalize)
30 )
31 )
32 $(TR $(TDNW Pruning and Filling)
33 $(TD
34 $(MYREF center)
35 $(MYREF chomp)
36 $(MYREF chompPrefix)
37 $(MYREF chop)
38 $(MYREF detabber)
39 $(MYREF detab)
40 $(MYREF entab)
41 $(MYREF entabber)
42 $(MYREF leftJustify)
43 $(MYREF outdent)
44 $(MYREF rightJustify)
45 $(MYREF strip)
46 $(MYREF stripLeft)
47 $(MYREF stripRight)
48 $(MYREF wrap)
49 )
50 )
51 $(TR $(TDNW Substitution)
52 $(TD
53 $(MYREF abbrev)
54 $(MYREF soundex)
55 $(MYREF soundexer)
56 $(MYREF succ)
57 $(MYREF tr)
58 $(MYREF translate)
59 )
60 )
61 $(TR $(TDNW Miscellaneous)
62 $(TD
63 $(MYREF assumeUTF)
64 $(MYREF fromStringz)
65 $(MYREF lineSplitter)
66 $(MYREF representation)
67 $(MYREF splitLines)
68 $(MYREF toStringz)
69 )
70 )))
71
72 Objects of types `string`, `wstring`, and `dstring` are value types
73 and cannot be mutated element-by-element. For using mutation during building
74 strings, use `char[]`, `wchar[]`, or `dchar[]`. The `xxxstring`
75 types are preferable because they don't exhibit undesired aliasing, thus
76 making code more robust.
77
78 The following functions are publicly imported:
79
80 $(BOOKTABLE ,
81 $(TR $(TH Module) $(TH Functions) )
82 $(LEADINGROW Publicly imported functions)
83 $(TR $(TD std.algorithm)
84 $(TD
85 $(REF_SHORT cmp, std,algorithm,comparison)
86 $(REF_SHORT count, std,algorithm,searching)
87 $(REF_SHORT endsWith, std,algorithm,searching)
88 $(REF_SHORT startsWith, std,algorithm,searching)
89 ))
90 $(TR $(TD std.array)
91 $(TD
92 $(REF_SHORT join, std,array)
93 $(REF_SHORT replace, std,array)
94 $(REF_SHORT replaceInPlace, std,array)
95 $(REF_SHORT split, std,array)
96 $(REF_SHORT empty, std,array)
97 ))
98 $(TR $(TD std.format)
99 $(TD
100 $(REF_SHORT format, std,format)
101 $(REF_SHORT sformat, std,format)
102 ))
103 $(TR $(TD std.uni)
104 $(TD
105 $(REF_SHORT icmp, std,uni)
106 $(REF_SHORT toLower, std,uni)
107 $(REF_SHORT toLowerInPlace, std,uni)
108 $(REF_SHORT toUpper, std,uni)
109 $(REF_SHORT toUpperInPlace, std,uni)
110 ))
111 )
112
113 There is a rich set of functions for string handling defined in other modules.
114 Functions related to Unicode and ASCII are found in $(MREF std, uni)
115 and $(MREF std, ascii), respectively. Other functions that have a
116 wider generality than just strings can be found in $(MREF std, algorithm)
117 and $(MREF std, range).
118
119 See_Also:
120 $(LIST
121 $(MREF std, algorithm) and
122 $(MREF std, range)
123 for generic range algorithms
124 ,
125 $(MREF std, ascii)
126 for functions that work with ASCII strings
127 ,
128 $(MREF std, uni)
129 for functions that work with unicode strings
130 )
131
132 Copyright: Copyright The D Language Foundation 2007-.
133
134 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
135
136 Authors: $(HTTP digitalmars.com, Walter Bright),
137 $(HTTP erdani.org, Andrei Alexandrescu),
138 $(HTTP jmdavisprog.com, Jonathan M Davis),
139 and David L. 'SpottedTiger' Davis
140
141 Source: $(PHOBOSSRC std/string.d)
142
143 */
144 module std.string;
145
version(StdUnittest)146 version (StdUnittest)
147 {
148 private:
149 struct TestAliasedString
150 {
151 string get() @safe @nogc pure nothrow return scope { return _s; }
152 alias get this;
153 @disable this(this);
154 string _s;
155 }
156
157 bool testAliasedString(alias func, Args...)(string s, Args args)
158 {
159 import std.algorithm.comparison : equal;
160 auto a = func(TestAliasedString(s), args);
161 auto b = func(s, args);
162 static if (is(typeof(equal(a, b))))
163 {
164 // For ranges, compare contents instead of object identity.
165 return equal(a, b);
166 }
167 else
168 {
169 return a == b;
170 }
171 }
172 }
173
174 public import std.format : format, sformat;
175 import std.typecons : Flag, Yes, No;
176 public import std.uni : icmp, toLower, toLowerInPlace, toUpper, toUpperInPlace;
177
178 import std.meta : AliasSeq, staticIndexOf;
179 import std.range.primitives : back, ElementEncodingType, ElementType, front,
180 hasLength, hasSlicing, isBidirectionalRange, isForwardRange, isInfinite,
181 isInputRange, isOutputRange, isRandomAccessRange, popBack, popFront, put,
182 save;
183 import std.traits : isConvertibleToString, isNarrowString, isSomeChar,
184 isSomeString, StringTypeOf, Unqual;
185
186 //public imports for backward compatibility
187 public import std.algorithm.comparison : cmp;
188 public import std.algorithm.searching : startsWith, endsWith, count;
189 public import std.array : join, replace, replaceInPlace, split, empty;
190
191 /* ************* Exceptions *************** */
192
193 /++
194 Exception thrown on errors in std.string functions.
195 +/
196 class StringException : Exception
197 {
198 import std.exception : basicExceptionCtors;
199
200 ///
201 mixin basicExceptionCtors;
202 }
203
204 ///
205 @safe pure unittest
206 {
207 import std.exception : assertThrown;
208 auto bad = " a\n\tb\n c";
209 assertThrown!StringException(bad.outdent);
210 }
211
212 /++
213 Params:
214 cString = A null-terminated c-style string.
215
216 Returns: A D-style array of `char`, `wchar` or `dchar` referencing the same
217 string. The returned array will retain the same type qualifiers as the input.
218
219 $(RED Important Note:) The returned array is a slice of the original buffer.
220 The original data is not changed and not copied.
221 +/
222 inout(Char)[] fromStringz(Char)(return scope inout(Char)* cString) @nogc @system pure nothrow
223 if (isSomeChar!Char)
224 {
225 import core.stdc.stddef : wchar_t;
226
227 static if (is(immutable Char == immutable char))
228 import core.stdc.string : cstrlen = strlen;
229 else static if (is(immutable Char == immutable wchar_t))
230 import core.stdc.wchar_ : cstrlen = wcslen;
231 else
cstrlen(scope const Char * s)232 static size_t cstrlen(scope const Char* s)
233 {
234 const(Char)* p = s;
235 while (*p)
236 ++p;
237 return p - s;
238 }
239
240 return cString ? cString[0 .. cstrlen(cString)] : null;
241 }
242
243 /// ditto
244 inout(Char)[] fromStringz(Char)(return scope inout(Char)[] cString) @nogc @safe pure nothrow
245 if (isSomeChar!Char)
246 {
247 foreach (i; 0 .. cString.length)
248 if (cString[i] == '\0')
249 return cString[0 .. i];
250
251 return cString;
252 }
253
254 ///
255 @system pure unittest
256 {
257 assert(fromStringz("foo\0"c.ptr) == "foo"c);
258 assert(fromStringz("foo\0"w.ptr) == "foo"w);
259 assert(fromStringz("foo\0"d.ptr) == "foo"d);
260
261 assert(fromStringz("福\0"c.ptr) == "福"c);
262 assert(fromStringz("福\0"w.ptr) == "福"w);
263 assert(fromStringz("福\0"d.ptr) == "福"d);
264 }
265
266 ///
267 @nogc @safe pure nothrow unittest
268 {
269 struct C
270 {
271 char[32] name;
272 }
273 assert(C("foo\0"c).name.fromStringz() == "foo"c);
274
275 struct W
276 {
277 wchar[32] name;
278 }
279 assert(W("foo\0"w).name.fromStringz() == "foo"w);
280
281 struct D
282 {
283 dchar[32] name;
284 }
285 assert(D("foo\0"d).name.fromStringz() == "foo"d);
286 }
287
288 @nogc @safe pure nothrow unittest
289 {
290 assert( string.init.fromStringz() == ""c);
291 assert(wstring.init.fromStringz() == ""w);
292 assert(dstring.init.fromStringz() == ""d);
293
294 immutable char[3] a = "foo"c;
295 assert(a.fromStringz() == "foo"c);
296
297 immutable wchar[3] b = "foo"w;
298 assert(b.fromStringz() == "foo"w);
299
300 immutable dchar[3] c = "foo"d;
301 assert(c.fromStringz() == "foo"d);
302 }
303
304 @system pure unittest
305 {
306 char* a = null;
307 assert(fromStringz(a) == null);
308 wchar* b = null;
309 assert(fromStringz(b) == null);
310 dchar* c = null;
311 assert(fromStringz(c) == null);
312
313 const char* d = "foo\0";
314 assert(fromStringz(d) == "foo");
315
316 immutable char* e = "foo\0";
317 assert(fromStringz(e) == "foo");
318
319 const wchar* f = "foo\0";
320 assert(fromStringz(f) == "foo");
321
322 immutable wchar* g = "foo\0";
323 assert(fromStringz(g) == "foo");
324
325 const dchar* h = "foo\0";
326 assert(fromStringz(h) == "foo");
327
328 immutable dchar* i = "foo\0";
329 assert(fromStringz(i) == "foo");
330
331 immutable wchar z = 0x0000;
332 // Test some surrogate pairs
333 // high surrogates are in the range 0xD800 .. 0xDC00
334 // low surrogates are in the range 0xDC00 .. 0xE000
335 // since UTF16 doesn't specify endianness we test both.
foreach(wchar[]t;)336 foreach (wchar[] t; [[0xD800, 0xDC00], [0xD800, 0xE000], [0xDC00, 0xDC00],
337 [0xDC00, 0xE000], [0xDA00, 0xDE00]])
338 {
339 immutable hi = t[0], lo = t[1];
340 assert(fromStringz([hi, lo, z].ptr) == [hi, lo]);
341 assert(fromStringz([lo, hi, z].ptr) == [lo, hi]);
342 }
343 }
344
345 /++
346 Params:
347 s = A D-style string.
348
349 Returns: A C-style null-terminated string equivalent to `s`. `s`
350 must not contain embedded `'\0'`'s as any C function will treat the
351 first `'\0'` that it sees as the end of the string. If `s.empty` is
352 `true`, then a string containing only `'\0'` is returned.
353
354 $(RED Important Note:) When passing a `char*` to a C function, and the C
355 function keeps it around for any reason, make sure that you keep a
356 reference to it in your D code. Otherwise, it may become invalid during a
357 garbage collection cycle and cause a nasty bug when the C code tries to use
358 it.
359 +/
immutable(char)360 immutable(char)* toStringz(scope const(char)[] s) @trusted pure nothrow
361 out (result)
362 {
363 import core.stdc.string : strlen, memcmp;
364 if (result)
365 {
366 auto slen = s.length;
367 while (slen > 0 && s[slen-1] == 0) --slen;
368 assert(strlen(result) == slen,
369 "The result c string is shorter than the in input string");
370 assert(result[0 .. slen] == s[0 .. slen],
371 "The input and result string are not equal");
372 }
373 }
374 do
375 {
376 import std.exception : assumeUnique;
377
378 if (s.empty) return "".ptr;
379
380 /+ Unfortunately, this isn't reliable.
381 We could make this work if string literals are put
382 in read-only memory and we test if s[] is pointing into
383 that.
384
385 /* Peek past end of s[], if it's 0, no conversion necessary.
386 * Note that the compiler will put a 0 past the end of static
387 * strings, and the storage allocator will put a 0 past the end
388 * of newly allocated char[]'s.
389 */
390 char* p = &s[0] + s.length;
391 if (*p == 0)
392 return s;
393 +/
394
395 // Need to make a copy
396 auto copy = new char[s.length + 1];
397 copy[0 .. s.length] = s[];
398 copy[s.length] = 0;
399
400 return &assumeUnique(copy)[0];
401 }
402
403 ///
404 pure nothrow @system unittest
405 {
406 import core.stdc.string : strlen;
407 import std.conv : to;
408
409 auto p = toStringz("foo");
410 assert(strlen(p) == 3);
411 const(char)[] foo = "abbzxyzzy";
412 p = toStringz(foo[3 .. 5]);
413 assert(strlen(p) == 2);
414
415 string test = "";
416 p = toStringz(test);
417 assert(*p == 0);
418
419 test = "\0";
420 p = toStringz(test);
421 assert(*p == 0);
422
423 test = "foo\0";
424 p = toStringz(test);
425 assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0);
426
427 const string test2 = "";
428 p = toStringz(test2);
429 assert(*p == 0);
430
431 assert(toStringz([]) is toStringz(""));
432 }
433
434 pure nothrow @system unittest // https://issues.dlang.org/show_bug.cgi?id=15136
435 {
436 static struct S
437 {
438 immutable char[5] str;
439 ubyte foo;
thisS440 this(char[5] str) pure nothrow
441 {
442 this.str = str;
443 }
444 }
445 auto s = S("01234");
446 const str = s.str.toStringz;
447 assert(str !is s.str.ptr);
448 assert(*(str + 5) == 0); // Null terminated.
449 s.foo = 42;
450 assert(*(str + 5) == 0); // Still null terminated.
451 }
452
453
454 /**
455 Flag indicating whether a search is case-sensitive.
456 */
457 alias CaseSensitive = Flag!"caseSensitive";
458
459 /++
460 Searches for character in range.
461
462 Params:
463 s = string or InputRange of characters to search in correct UTF format
464 c = character to search for
465 startIdx = starting index to a well-formed code point
466 cs = `Yes.caseSensitive` or `No.caseSensitive`
467
468 Returns:
469 the index of the first occurrence of `c` in `s` with
470 respect to the start index `startIdx`. If `c`
471 is not found, then `-1` is returned.
472 If `c` is found the value of the returned index is at least
473 `startIdx`.
474 If the parameters are not valid UTF, the result will still
475 be in the range [-1 .. s.length], but will not be reliable otherwise.
476
477 Throws:
478 If the sequence starting at `startIdx` does not represent a well
479 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
480
481 See_Also: $(REF countUntil, std,algorithm,searching)
482 +/
483 ptrdiff_t indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive)
484 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range)
485 {
486 return _indexOf(s, c, cs);
487 }
488
489 /// Ditto
490 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, CaseSensitive cs = Yes.caseSensitive)
491 if (isSomeChar!C)
492 {
493 return _indexOf(s, c, cs);
494 }
495
496 /// Ditto
497 ptrdiff_t indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive)
498 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range)
499 {
500 return _indexOf(s, c, startIdx, cs);
501 }
502
503 /// Ditto
504 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive)
505 if (isSomeChar!C)
506 {
507 return _indexOf(s, c, startIdx, cs);
508 }
509
510 ///
511 @safe pure unittest
512 {
513 import std.typecons : No;
514
515 string s = "Hello World";
516 assert(indexOf(s, 'W') == 6);
517 assert(indexOf(s, 'Z') == -1);
518 assert(indexOf(s, 'w', No.caseSensitive) == 6);
519 }
520
521 ///
522 @safe pure unittest
523 {
524 import std.typecons : No;
525
526 string s = "Hello World";
527 assert(indexOf(s, 'W', 4) == 6);
528 assert(indexOf(s, 'Z', 100) == -1);
529 assert(indexOf(s, 'w', 3, No.caseSensitive) == 6);
530 }
531
532 @safe pure unittest
533 {
534 assert(testAliasedString!indexOf("std/string.d", '/'));
535
536 enum S : string { a = "std/string.d" }
537 assert(S.a.indexOf('/') == 3);
538
539 char[S.a.length] sa = S.a[];
540 assert(sa.indexOf('/') == 3);
541 }
542
543 @safe pure unittest
544 {
545 import std.conv : to;
546 import std.exception : assertCTFEable;
547 import std.traits : EnumMembers;
548 import std.utf : byChar, byWchar, byDchar;
549
550 assertCTFEable!(
551 {
552 static foreach (S; AliasSeq!(string, wstring, dstring))
553 {{
554 assert(indexOf(cast(S) null, cast(dchar)'a') == -1);
555 assert(indexOf(to!S("def"), cast(dchar)'a') == -1);
556 assert(indexOf(to!S("abba"), cast(dchar)'a') == 0);
557 assert(indexOf(to!S("def"), cast(dchar)'f') == 2);
558
559 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
560 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
561 assert(indexOf(to!S("Abba"), cast(dchar)'a', No.caseSensitive) == 0);
562 assert(indexOf(to!S("def"), cast(dchar)'F', No.caseSensitive) == 2);
563 assert(indexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
564
565 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
566 assert(indexOf("def", cast(char)'f', No.caseSensitive) == 2);
567 assert(indexOf(sPlts, cast(char)'P', No.caseSensitive) == 23);
568 assert(indexOf(sPlts, cast(char)'R', No.caseSensitive) == 2);
569 }}
570
571 foreach (cs; EnumMembers!CaseSensitive)
572 {
573 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9);
574 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7);
575 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6);
576
577 assert(indexOf("hello\U00010143\u0100\U00010143".byChar, '\u0100', cs) == 9);
578 assert(indexOf("hello\U00010143\u0100\U00010143".byWchar, '\u0100', cs) == 7);
579 assert(indexOf("hello\U00010143\u0100\U00010143".byDchar, '\u0100', cs) == 6);
580
581 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, 'l', cs) == 2);
582 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, '\u0100', cs) == 7);
583 assert(indexOf("hello\U0000EFFF\u0100\U00010143".byChar, '\u0100', cs) == 8);
584
585 assert(indexOf("hello\U00010100".byWchar, '\U00010100', cs) == 5);
586 assert(indexOf("hello\U00010100".byWchar, '\U00010101', cs) == -1);
587 }
588
589 char[10] fixedSizeArray = "0123456789";
590 assert(indexOf(fixedSizeArray, '2') == 2);
591 });
592 }
593
594 @safe pure unittest
595 {
596 assert(testAliasedString!indexOf("std/string.d", '/', 0));
597 assert(testAliasedString!indexOf("std/string.d", '/', 1));
598 assert(testAliasedString!indexOf("std/string.d", '/', 4));
599
600 enum S : string { a = "std/string.d" }
601 assert(S.a.indexOf('/', 0) == 3);
602 assert(S.a.indexOf('/', 1) == 3);
603 assert(S.a.indexOf('/', 4) == -1);
604
605 char[S.a.length] sa = S.a[];
606 assert(sa.indexOf('/', 0) == 3);
607 assert(sa.indexOf('/', 1) == 3);
608 assert(sa.indexOf('/', 4) == -1);
609 }
610
611 @safe pure unittest
612 {
613 import std.conv : to;
614 import std.traits : EnumMembers;
615 import std.utf : byCodeUnit, byChar, byWchar;
616
617 assert("hello".byCodeUnit.indexOf(cast(dchar)'l', 1) == 2);
618 assert("hello".byWchar.indexOf(cast(dchar)'l', 1) == 2);
619 assert("hello".byWchar.indexOf(cast(dchar)'l', 6) == -1);
620
621 static foreach (S; AliasSeq!(string, wstring, dstring))
622 {{
623 assert(indexOf(cast(S) null, cast(dchar)'a', 1) == -1);
624 assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1);
625 assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3);
626 assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2);
627
628 assert((to!S("def")).indexOf(cast(dchar)'a', 1,
629 No.caseSensitive) == -1);
630 assert(indexOf(to!S("def"), cast(dchar)'a', 1,
631 No.caseSensitive) == -1);
632 assert(indexOf(to!S("def"), cast(dchar)'a', 12,
633 No.caseSensitive) == -1);
634 assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2,
635 No.caseSensitive) == 3);
636 assert(indexOf(to!S("def"), cast(dchar)'F', 2, No.caseSensitive) == 2);
637
638 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
639 assert(indexOf("def", cast(char)'f', cast(uint) 2,
640 No.caseSensitive) == 2);
641 assert(indexOf(sPlts, cast(char)'P', 12, No.caseSensitive) == 23);
642 assert(indexOf(sPlts, cast(char)'R', cast(ulong) 1,
643 No.caseSensitive) == 2);
644 }}
645
646 foreach (cs; EnumMembers!CaseSensitive)
647 {
648 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs)
649 == 9);
650 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs)
651 == 7);
652 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs)
653 == 6);
654 }
655 }
656
657 private ptrdiff_t _indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive)
658 if (isInputRange!Range && isSomeChar!(ElementType!Range))
659 {
660 static import std.ascii;
661 static import std.uni;
662 import std.utf : byDchar, byCodeUnit, UTFException, codeLength;
663 alias Char = Unqual!(ElementEncodingType!Range);
664
665 if (cs == Yes.caseSensitive)
666 {
667 static if (Char.sizeof == 1 && isSomeString!Range)
668 {
669 if (std.ascii.isASCII(c) && !__ctfe)
670 { // Plain old ASCII
trustedmemchr(Range s,char c)671 static ptrdiff_t trustedmemchr(Range s, char c) @trusted
672 {
673 import core.stdc.string : memchr;
674 const p = cast(const(Char)*)memchr(s.ptr, c, s.length);
675 return p ? p - s.ptr : -1;
676 }
677
678 return trustedmemchr(s, cast(char) c);
679 }
680 }
681
682 static if (Char.sizeof == 1)
683 {
684 if (c <= 0x7F)
685 {
686 ptrdiff_t i;
foreach(const c2;s)687 foreach (const c2; s)
688 {
689 if (c == c2)
690 return i;
691 ++i;
692 }
693 }
694 else
695 {
696 ptrdiff_t i;
697 foreach (const c2; s.byDchar())
698 {
699 if (c == c2)
700 return i;
701 i += codeLength!Char(c2);
702 }
703 }
704 }
705 else static if (Char.sizeof == 2)
706 {
707 if (c <= 0xFFFF)
708 {
709 ptrdiff_t i;
foreach(const c2;s)710 foreach (const c2; s)
711 {
712 if (c == c2)
713 return i;
714 ++i;
715 }
716 }
717 else if (c <= 0x10FFFF)
718 {
719 // Encode UTF-16 surrogate pair
720 const wchar c1 = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
721 const wchar c2 = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00);
722 ptrdiff_t i;
723 for (auto r = s.byCodeUnit(); !r.empty; r.popFront())
724 {
725 if (c1 == r.front)
726 {
727 r.popFront();
728 if (r.empty) // invalid UTF - missing second of pair
729 break;
730 if (c2 == r.front)
731 return i;
732 ++i;
733 }
734 ++i;
735 }
736 }
737 }
738 else static if (Char.sizeof == 4)
739 {
740 ptrdiff_t i;
foreach(const c2;s)741 foreach (const c2; s)
742 {
743 if (c == c2)
744 return i;
745 ++i;
746 }
747 }
748 else
749 static assert(0);
750 return -1;
751 }
752 else
753 {
754 if (std.ascii.isASCII(c))
755 { // Plain old ASCII
756 immutable c1 = cast(char) std.ascii.toLower(c);
757
758 ptrdiff_t i;
759 foreach (const c2; s.byCodeUnit())
760 {
761 if (c1 == std.ascii.toLower(c2))
762 return i;
763 ++i;
764 }
765 }
766 else
767 { // c is a universal character
768 immutable c1 = std.uni.toLower(c);
769
770 ptrdiff_t i;
771 foreach (const c2; s.byDchar())
772 {
773 if (c1 == std.uni.toLower(c2))
774 return i;
775 i += codeLength!Char(c2);
776 }
777 }
778 }
779 return -1;
780 }
781
782 private ptrdiff_t _indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive)
783 if (isInputRange!Range && isSomeChar!(ElementType!Range))
784 {
785 static if (isSomeString!(typeof(s)) ||
786 (hasSlicing!(typeof(s)) && hasLength!(typeof(s))))
787 {
788 if (startIdx < s.length)
789 {
790 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], c, cs);
791 if (foundIdx != -1)
792 {
793 return foundIdx + cast(ptrdiff_t) startIdx;
794 }
795 }
796 }
797 else
798 {
799 foreach (i; 0 .. startIdx)
800 {
801 if (s.empty)
802 return -1;
803 s.popFront();
804 }
805 ptrdiff_t foundIdx = indexOf(s, c, cs);
806 if (foundIdx != -1)
807 {
808 return foundIdx + cast(ptrdiff_t) startIdx;
809 }
810 }
811 return -1;
812 }
813
_indexOfStr(CaseSensitive cs)814 private template _indexOfStr(CaseSensitive cs)
815 {
816 private ptrdiff_t _indexOfStr(Range, Char)(Range s, const(Char)[] sub)
817 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
818 isSomeChar!Char)
819 {
820 alias Char1 = Unqual!(ElementEncodingType!Range);
821
822 static if (isSomeString!Range)
823 {
824 static if (is(Char1 == Char) && cs == Yes.caseSensitive)
825 {
826 import std.algorithm.searching : countUntil;
827 return s.representation.countUntil(sub.representation);
828 }
829 else
830 {
831 import std.algorithm.searching : find;
832
833 const(Char1)[] balance;
834 static if (cs == Yes.caseSensitive)
835 {
836 balance = find(s, sub);
837 }
838 else
839 {
840 balance = find!
841 ((a, b) => toLower(a) == toLower(b))
842 (s, sub);
843 }
844 return () @trusted { return balance.empty ? -1 : balance.ptr - s.ptr; } ();
845 }
846 }
847 else
848 {
849 if (s.empty)
850 return -1;
851 if (sub.empty)
852 return 0; // degenerate case
853
854 import std.utf : byDchar, codeLength;
855 auto subr = sub.byDchar; // decode sub[] by dchar's
856 dchar sub0 = subr.front; // cache first character of sub[]
857 subr.popFront();
858
859 // Special case for single character search
860 if (subr.empty)
861 return indexOf(s, sub0, cs);
862
863 static if (cs == No.caseSensitive)
864 sub0 = toLower(sub0);
865
866 /* Classic double nested loop search algorithm
867 */
868 ptrdiff_t index = 0; // count code unit index into s
869 for (auto sbydchar = s.byDchar(); !sbydchar.empty; sbydchar.popFront())
870 {
871 dchar c2 = sbydchar.front;
872 static if (cs == No.caseSensitive)
873 c2 = toLower(c2);
874 if (c2 == sub0)
875 {
876 auto s2 = sbydchar.save; // why s must be a forward range
877 foreach (c; subr.save)
878 {
879 s2.popFront();
880 if (s2.empty)
881 return -1;
882 static if (cs == Yes.caseSensitive)
883 {
884 if (c != s2.front)
885 goto Lnext;
886 }
887 else
888 {
889 if (toLower(c) != toLower(s2.front))
890 goto Lnext;
891 }
892 }
893 return index;
894 }
895 Lnext:
896 index += codeLength!Char1(c2);
897 }
898 return -1;
899 }
900 }
901 }
902
903 /++
904 Searches for substring in `s`.
905
906 Params:
907 s = string or ForwardRange of characters to search in correct UTF format
908 sub = substring to search for
909 startIdx = the index into s to start searching from
910 cs = `Yes.caseSensitive` (default) or `No.caseSensitive`
911
912 Returns:
913 the index of the first occurrence of `sub` in `s` with
914 respect to the start index `startIdx`. If `sub` is not found,
915 then `-1` is returned.
916 If the arguments are not valid UTF, the result will still
917 be in the range [-1 .. s.length], but will not be reliable otherwise.
918 If `sub` is found the value of the returned index is at least
919 `startIdx`.
920
921 Throws:
922 If the sequence starting at `startIdx` does not represent a well
923 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
924
925 Bugs:
926 Does not work with case insensitive strings where the mapping of
927 tolower and toupper is not 1:1.
928 +/
929 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub)
930 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
931 isSomeChar!Char)
932 {
933 return _indexOfStr!(Yes.caseSensitive)(s, sub);
934 }
935
936 /// Ditto
937 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub, in CaseSensitive cs)
938 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
939 isSomeChar!Char)
940 {
941 if (cs == Yes.caseSensitive)
942 return indexOf(s, sub);
943 else
944 return _indexOfStr!(No.caseSensitive)(s, sub);
945 }
946
947 /// Ditto
948 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
949 in size_t startIdx)
950 @safe
951 if (isSomeChar!Char1 && isSomeChar!Char2)
952 {
953 if (startIdx >= s.length)
954 return -1;
955 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub);
956 if (foundIdx == -1)
957 return -1;
958 return foundIdx + cast(ptrdiff_t) startIdx;
959 }
960
961 /// Ditto
962 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
963 in size_t startIdx, in CaseSensitive cs)
964 @safe
965 if (isSomeChar!Char1 && isSomeChar!Char2)
966 {
967 if (startIdx >= s.length)
968 return -1;
969 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub, cs);
970 if (foundIdx == -1)
971 return -1;
972 return foundIdx + cast(ptrdiff_t) startIdx;
973 }
974
975 ///
976 @safe pure unittest
977 {
978 import std.typecons : No;
979
980 string s = "Hello World";
981 assert(indexOf(s, "Wo", 4) == 6);
982 assert(indexOf(s, "Zo", 100) == -1);
983 assert(indexOf(s, "wo", 3, No.caseSensitive) == 6);
984 }
985
986 ///
987 @safe pure unittest
988 {
989 import std.typecons : No;
990
991 string s = "Hello World";
992 assert(indexOf(s, "Wo") == 6);
993 assert(indexOf(s, "Zo") == -1);
994 assert(indexOf(s, "wO", No.caseSensitive) == 6);
995 }
996
997 @safe pure nothrow @nogc unittest
998 {
999 string s = "Hello World";
1000 assert(indexOf(s, "Wo", 4) == 6);
1001 assert(indexOf(s, "Zo", 100) == -1);
1002 assert(indexOf(s, "Wo") == 6);
1003 assert(indexOf(s, "Zo") == -1);
1004 }
1005
1006 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub)
1007 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
1008 isSomeChar!Char) &&
1009 is(StringTypeOf!Range))
1010 {
1011 return indexOf!(StringTypeOf!Range)(s, sub);
1012 }
1013
1014 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub,
1015 in CaseSensitive cs)
1016 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
1017 isSomeChar!Char) &&
1018 is(StringTypeOf!Range))
1019 {
1020 return indexOf!(StringTypeOf!Range)(s, sub, cs);
1021 }
1022
1023 @safe pure nothrow @nogc unittest
1024 {
1025 assert(testAliasedString!indexOf("std/string.d", "string"));
1026 }
1027
1028 @safe pure unittest
1029 {
1030 import std.conv : to;
1031 import std.exception : assertCTFEable;
1032 import std.traits : EnumMembers;
1033
1034 assertCTFEable!(
1035 {
1036 static foreach (S; AliasSeq!(string, wstring, dstring))
1037 {
1038 static foreach (T; AliasSeq!(string, wstring, dstring))
1039 {{
1040 assert(indexOf(cast(S) null, to!T("a")) == -1);
1041 assert(indexOf(to!S("def"), to!T("a")) == -1);
1042 assert(indexOf(to!S("abba"), to!T("a")) == 0);
1043 assert(indexOf(to!S("def"), to!T("f")) == 2);
1044 assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3);
1045 assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6);
1046
1047 assert(indexOf(to!S("dfeffgfff"), to!T("a"), No.caseSensitive) == -1);
1048 assert(indexOf(to!S("def"), to!T("a"), No.caseSensitive) == -1);
1049 assert(indexOf(to!S("abba"), to!T("a"), No.caseSensitive) == 0);
1050 assert(indexOf(to!S("def"), to!T("f"), No.caseSensitive) == 2);
1051 assert(indexOf(to!S("dfefffg"), to!T("fff"), No.caseSensitive) == 3);
1052 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), No.caseSensitive) == 6);
1053
1054 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1055 S sMars = "Who\'s \'My Favorite Maritian?\'";
1056
1057 assert(indexOf(sMars, to!T("MY fAVe"), No.caseSensitive) == -1);
1058 assert(indexOf(sMars, to!T("mY fAVOriTe"), No.caseSensitive) == 7);
1059 assert(indexOf(sPlts, to!T("mArS:"), No.caseSensitive) == 0);
1060 assert(indexOf(sPlts, to!T("rOcK"), No.caseSensitive) == 17);
1061 assert(indexOf(sPlts, to!T("Un."), No.caseSensitive) == 41);
1062 assert(indexOf(sPlts, to!T(sPlts), No.caseSensitive) == 0);
1063
1064 assert(indexOf("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
1065
1066 // Thanks to Carlos Santander B. and zwang
1067 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
1068 to!T("page-break-before"), No.caseSensitive) == -1);
1069 }}
1070
1071 foreach (cs; EnumMembers!CaseSensitive)
1072 {
1073 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9);
1074 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7);
1075 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6);
1076 }
1077 }
1078 });
1079 }
1080
1081 @safe pure @nogc nothrow
1082 unittest
1083 {
1084 import std.traits : EnumMembers;
1085 import std.utf : byWchar;
1086
1087 foreach (cs; EnumMembers!CaseSensitive)
1088 {
1089 assert(indexOf("".byWchar, "", cs) == -1);
1090 assert(indexOf("hello".byWchar, "", cs) == 0);
1091 assert(indexOf("hello".byWchar, "l", cs) == 2);
1092 assert(indexOf("heLLo".byWchar, "LL", cs) == 2);
1093 assert(indexOf("hello".byWchar, "lox", cs) == -1);
1094 assert(indexOf("hello".byWchar, "betty", cs) == -1);
1095 assert(indexOf("hello\U00010143\u0100*\U00010143".byWchar, "\u0100*", cs) == 7);
1096 }
1097 }
1098
1099 @safe pure unittest
1100 {
1101 import std.conv : to;
1102 import std.traits : EnumMembers;
1103
1104 static foreach (S; AliasSeq!(string, wstring, dstring))
1105 {
1106 static foreach (T; AliasSeq!(string, wstring, dstring))
1107 {{
1108 assert(indexOf(cast(S) null, to!T("a"), 1337) == -1);
1109 assert(indexOf(to!S("def"), to!T("a"), 0) == -1);
1110 assert(indexOf(to!S("abba"), to!T("a"), 2) == 3);
1111 assert(indexOf(to!S("def"), to!T("f"), 1) == 2);
1112 assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3);
1113 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6);
1114
1115 assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, No.caseSensitive) == -1);
1116 assert(indexOf(to!S("def"), to!T("a"), 2, No.caseSensitive) == -1);
1117 assert(indexOf(to!S("abba"), to!T("a"), 3, No.caseSensitive) == 3);
1118 assert(indexOf(to!S("def"), to!T("f"), 1, No.caseSensitive) == 2);
1119 assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, No.caseSensitive) == 3);
1120 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, No.caseSensitive) == 6);
1121 assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive) == 9,
1122 to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive))
1123 ~ " " ~ S.stringof ~ " " ~ T.stringof);
1124
1125 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1126 S sMars = "Who\'s \'My Favorite Maritian?\'";
1127
1128 assert(indexOf(sMars, to!T("MY fAVe"), 10,
1129 No.caseSensitive) == -1);
1130 assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, No.caseSensitive) == 7);
1131 assert(indexOf(sPlts, to!T("mArS:"), 0, No.caseSensitive) == 0);
1132 assert(indexOf(sPlts, to!T("rOcK"), 12, No.caseSensitive) == 17);
1133 assert(indexOf(sPlts, to!T("Un."), 32, No.caseSensitive) == 41);
1134 assert(indexOf(sPlts, to!T(sPlts), 0, No.caseSensitive) == 0);
1135
1136 assert(indexOf("\u0100", to!T("\u0100"), 0, No.caseSensitive) == 0);
1137
1138 // Thanks to Carlos Santander B. and zwang
1139 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
1140 to!T("page-break-before"), 10, No.caseSensitive) == -1);
1141
1142 // In order for indexOf with and without index to be consistent
1143 assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0));
1144 }}
1145
1146 foreach (cs; EnumMembers!CaseSensitive)
1147 {
1148 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"),
1149 3, cs) == 9);
1150 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"),
1151 3, cs) == 7);
1152 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"),
1153 3, cs) == 6);
1154 }
1155 }
1156 }
1157
1158 /++
1159 Params:
1160 s = string to search
1161 c = character to search for
1162 startIdx = the index into s to start searching from
1163 cs = `Yes.caseSensitive` or `No.caseSensitive`
1164
1165 Returns:
1166 The index of the last occurrence of `c` in `s`. If `c` is not
1167 found, then `-1` is returned. The `startIdx` slices `s` in
1168 the following way $(D s[0 .. startIdx]). `startIdx` represents a
1169 codeunit index in `s`.
1170
1171 Throws:
1172 If the sequence ending at `startIdx` does not represent a well
1173 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
1174
1175 `cs` indicates whether the comparisons are case sensitive.
1176 +/
1177 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c,
1178 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1179 if (isSomeChar!Char)
1180 {
1181 static import std.ascii, std.uni;
1182 import std.utf : canSearchInCodeUnits;
1183 if (cs == Yes.caseSensitive)
1184 {
1185 if (canSearchInCodeUnits!Char(c))
1186 {
foreach_reverse(i,it;s)1187 foreach_reverse (i, it; s)
1188 {
1189 if (it == c)
1190 {
1191 return i;
1192 }
1193 }
1194 }
1195 else
1196 {
foreach_reverse(i,dchar it;s)1197 foreach_reverse (i, dchar it; s)
1198 {
1199 if (it == c)
1200 {
1201 return i;
1202 }
1203 }
1204 }
1205 }
1206 else
1207 {
1208 if (std.ascii.isASCII(c))
1209 {
1210 immutable c1 = std.ascii.toLower(c);
1211
foreach_reverse(i,it;s)1212 foreach_reverse (i, it; s)
1213 {
1214 immutable c2 = std.ascii.toLower(it);
1215 if (c1 == c2)
1216 {
1217 return i;
1218 }
1219 }
1220 }
1221 else
1222 {
1223 immutable c1 = std.uni.toLower(c);
1224
foreach_reverse(i,dchar it;s)1225 foreach_reverse (i, dchar it; s)
1226 {
1227 immutable c2 = std.uni.toLower(it);
1228 if (c1 == c2)
1229 {
1230 return i;
1231 }
1232 }
1233 }
1234 }
1235
1236 return -1;
1237 }
1238
1239 /// Ditto
1240 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, in size_t startIdx,
1241 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1242 if (isSomeChar!Char)
1243 {
1244 if (startIdx <= s.length)
1245 {
1246 return lastIndexOf(s[0u .. startIdx], c, cs);
1247 }
1248
1249 return -1;
1250 }
1251
1252 ///
1253 @safe pure unittest
1254 {
1255 import std.typecons : No;
1256
1257 string s = "Hello World";
1258 assert(lastIndexOf(s, 'l') == 9);
1259 assert(lastIndexOf(s, 'Z') == -1);
1260 assert(lastIndexOf(s, 'L', No.caseSensitive) == 9);
1261 }
1262
1263 ///
1264 @safe pure unittest
1265 {
1266 import std.typecons : No;
1267
1268 string s = "Hello World";
1269 assert(lastIndexOf(s, 'l', 4) == 3);
1270 assert(lastIndexOf(s, 'Z', 1337) == -1);
1271 assert(lastIndexOf(s, 'L', 7, No.caseSensitive) == 3);
1272 }
1273
1274 @safe pure unittest
1275 {
1276 import std.conv : to;
1277 import std.exception : assertCTFEable;
1278 import std.traits : EnumMembers;
1279
1280 assertCTFEable!(
1281 {
1282 static foreach (S; AliasSeq!(string, wstring, dstring))
1283 {{
1284 assert(lastIndexOf(cast(S) null, 'a') == -1);
1285 assert(lastIndexOf(to!S("def"), 'a') == -1);
1286 assert(lastIndexOf(to!S("abba"), 'a') == 3);
1287 assert(lastIndexOf(to!S("def"), 'f') == 2);
1288 assert(lastIndexOf(to!S("ödef"), 'ö') == 0);
1289
1290 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1291 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1292 assert(lastIndexOf(to!S("AbbA"), 'a', No.caseSensitive) == 3);
1293 assert(lastIndexOf(to!S("def"), 'F', No.caseSensitive) == 2);
1294 assert(lastIndexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
1295 assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"),
1296 No.caseSensitive) == 1);
1297
1298 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1299
1300 assert(lastIndexOf(to!S("def"), 'f', No.caseSensitive) == 2);
1301 assert(lastIndexOf(sPlts, 'M', No.caseSensitive) == 34);
1302 assert(lastIndexOf(sPlts, 'S', No.caseSensitive) == 40);
1303 }}
1304
1305 foreach (cs; EnumMembers!CaseSensitive)
1306 {
1307 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1308 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1309 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1310 }
1311 });
1312 }
1313
1314 @safe pure unittest
1315 {
1316 import std.conv : to;
1317 import std.traits : EnumMembers;
1318
1319 static foreach (S; AliasSeq!(string, wstring, dstring))
1320 {{
1321 assert(lastIndexOf(cast(S) null, 'a') == -1);
1322 assert(lastIndexOf(to!S("def"), 'a') == -1);
1323 assert(lastIndexOf(to!S("abba"), 'a', 3) == 0);
1324 assert(lastIndexOf(to!S("deff"), 'f', 3) == 2);
1325
1326 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1327 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1328 assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), No.caseSensitive) == 3,
1329 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, No.caseSensitive)));
1330 assert(lastIndexOf(to!S("def"), 'F', 3, No.caseSensitive) == 2);
1331
1332 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1333
1334 assert(lastIndexOf(to!S("def"), 'f', 4, No.caseSensitive) == -1);
1335 assert(lastIndexOf(sPlts, 'M', sPlts.length -2, No.caseSensitive) == 34);
1336 assert(lastIndexOf(sPlts, 'S', sPlts.length -2, No.caseSensitive) == 40);
1337 }}
1338
1339 foreach (cs; EnumMembers!CaseSensitive)
1340 {
1341 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1342 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1343 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1344 }
1345 }
1346
1347 /++
1348 Params:
1349 s = string to search
1350 sub = substring to search for
1351 startIdx = the index into s to start searching from
1352 cs = `Yes.caseSensitive` or `No.caseSensitive`
1353
1354 Returns:
1355 the index of the last occurrence of `sub` in `s`. If `sub` is
1356 not found, then `-1` is returned. The `startIdx` slices `s`
1357 in the following way $(D s[0 .. startIdx]). `startIdx` represents a
1358 codeunit index in `s`.
1359
1360 Throws:
1361 If the sequence ending at `startIdx` does not represent a well
1362 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
1363
1364 `cs` indicates whether the comparisons are case sensitive.
1365 +/
1366 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1367 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1368 if (isSomeChar!Char1 && isSomeChar!Char2)
1369 {
1370 import std.algorithm.searching : endsWith;
1371 import std.conv : to;
1372 import std.range.primitives : walkLength;
1373 static import std.uni;
1374 import std.utf : strideBack;
1375 if (sub.empty)
1376 return -1;
1377
1378 if (walkLength(sub) == 1)
1379 return lastIndexOf(s, sub.front, cs);
1380
1381 if (cs == Yes.caseSensitive)
1382 {
1383 static if (is(immutable Char1 == immutable Char2))
1384 {
1385 import core.stdc.string : memcmp;
1386
1387 immutable c = sub[0];
1388
1389 for (ptrdiff_t i = s.length - sub.length; i >= 0; --i)
1390 {
1391 if (s[i] == c)
1392 {
1393 if (__ctfe)
1394 {
1395 if (s[i + 1 .. i + sub.length] == sub[1 .. $])
1396 return i;
1397 }
1398 else
1399 {
trustedMemcmp(in void * s1,in void * s2,size_t n)1400 auto trustedMemcmp(in void* s1, in void* s2, size_t n) @trusted
1401 {
1402 return memcmp(s1, s2, n);
1403 }
1404 if (trustedMemcmp(&s[i + 1], &sub[1],
1405 (sub.length - 1) * Char1.sizeof) == 0)
1406 return i;
1407 }
1408 }
1409 }
1410 }
1411 else
1412 {
1413 for (size_t i = s.length; !s.empty;)
1414 {
1415 if (s.endsWith(sub))
1416 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1417
1418 i -= strideBack(s, i);
1419 s = s[0 .. i];
1420 }
1421 }
1422 }
1423 else
1424 {
1425 for (size_t i = s.length; !s.empty;)
1426 {
1427 if (endsWith!((a, b) => std.uni.toLower(a) == std.uni.toLower(b))
1428 (s, sub))
1429 {
1430 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1431 }
1432
1433 i -= strideBack(s, i);
1434 s = s[0 .. i];
1435 }
1436 }
1437
1438 return -1;
1439 }
1440
1441 /// Ditto
1442 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1443 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1444 if (isSomeChar!Char1 && isSomeChar!Char2)
1445 {
1446 if (startIdx <= s.length)
1447 {
1448 return lastIndexOf(s[0u .. startIdx], sub, cs);
1449 }
1450
1451 return -1;
1452 }
1453
1454 ///
1455 @safe pure unittest
1456 {
1457 import std.typecons : No;
1458
1459 string s = "Hello World";
1460 assert(lastIndexOf(s, "ll") == 2);
1461 assert(lastIndexOf(s, "Zo") == -1);
1462 assert(lastIndexOf(s, "lL", No.caseSensitive) == 2);
1463 }
1464
1465 ///
1466 @safe pure unittest
1467 {
1468 import std.typecons : No;
1469
1470 string s = "Hello World";
1471 assert(lastIndexOf(s, "ll", 4) == 2);
1472 assert(lastIndexOf(s, "Zo", 128) == -1);
1473 assert(lastIndexOf(s, "lL", 3, No.caseSensitive) == -1);
1474 }
1475
1476 @safe pure unittest
1477 {
1478 import std.conv : to;
1479
1480 static foreach (S; AliasSeq!(string, wstring, dstring))
1481 {{
1482 auto r = to!S("").lastIndexOf("hello");
1483 assert(r == -1, to!string(r));
1484
1485 r = to!S("hello").lastIndexOf("");
1486 assert(r == -1, to!string(r));
1487
1488 r = to!S("").lastIndexOf("");
1489 assert(r == -1, to!string(r));
1490 }}
1491 }
1492
1493 @safe pure unittest
1494 {
1495 import std.conv : to;
1496 import std.exception : assertCTFEable;
1497 import std.traits : EnumMembers;
1498
1499 assertCTFEable!(
1500 {
1501 static foreach (S; AliasSeq!(string, wstring, dstring))
1502 {
1503 static foreach (T; AliasSeq!(string, wstring, dstring))
1504 {{
1505 enum typeStr = S.stringof ~ " " ~ T.stringof;
1506
1507 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1508 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr);
1509 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr);
1510 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr);
1511 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr);
1512 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr);
1513 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr);
1514 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr);
1515 assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == -1, typeStr);
1516 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr);
1517
1518 assert(lastIndexOf(cast(S) null, to!T("a"), No.caseSensitive) == -1, typeStr);
1519 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1520 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), No.caseSensitive) == 6, typeStr);
1521 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), No.caseSensitive) == -1, typeStr);
1522 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), No.caseSensitive) == -1, typeStr);
1523 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), No.caseSensitive) == -1, typeStr);
1524 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), No.caseSensitive) == 0, typeStr);
1525
1526 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1527 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), No.caseSensitive) == 6, typeStr);
1528 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), No.caseSensitive) == 7, typeStr);
1529
1530 assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), Yes.caseSensitive) == 0);
1531
1532 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1533 S sMars = "Who\'s \'My Favorite Maritian?\'";
1534
1535 assert(lastIndexOf(sMars, to!T("RiTE maR"), No.caseSensitive) == 14, typeStr);
1536 assert(lastIndexOf(sPlts, to!T("FOuRTh"), No.caseSensitive) == 10, typeStr);
1537 assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), No.caseSensitive) == 0, typeStr);
1538 assert(lastIndexOf(sMars, to!T(sMars), No.caseSensitive) == 0, typeStr);
1539 }}
1540
1541 foreach (cs; EnumMembers!CaseSensitive)
1542 {
1543 enum csString = to!string(cs);
1544
1545 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString);
1546 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString);
1547 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString);
1548 }
1549 }
1550 });
1551 }
1552
1553 // https://issues.dlang.org/show_bug.cgi?id=13529
1554 @safe pure unittest
1555 {
1556 import std.conv : to;
1557 static foreach (S; AliasSeq!(string, wstring, dstring))
1558 {
1559 static foreach (T; AliasSeq!(string, wstring, dstring))
1560 {{
1561 enum typeStr = S.stringof ~ " " ~ T.stringof;
1562 auto idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö ö"));
1563 assert(idx != -1, to!string(idx) ~ " " ~ typeStr);
1564
1565 idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö öd"));
1566 assert(idx == -1, to!string(idx) ~ " " ~ typeStr);
1567 }}
1568 }
1569 }
1570
1571 @safe pure unittest
1572 {
1573 import std.conv : to;
1574 import std.traits : EnumMembers;
1575
1576 static foreach (S; AliasSeq!(string, wstring, dstring))
1577 {
1578 static foreach (T; AliasSeq!(string, wstring, dstring))
1579 {{
1580 enum typeStr = S.stringof ~ " " ~ T.stringof;
1581
1582 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1583 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr);
1584 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr);
1585 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~
1586 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6)));
1587 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr);
1588 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr);
1589 assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr);
1590 assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr);
1591 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == -1, typeStr);
1592 assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~
1593 to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472
1594
1595 assert(lastIndexOf(cast(S) null, to!T("a"), 1, No.caseSensitive) == -1, typeStr);
1596 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, No.caseSensitive) == 2, typeStr);
1597 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, No.caseSensitive) == 2, typeStr ~
1598 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, No.caseSensitive)));
1599 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , No.caseSensitive) == -1, typeStr);
1600 assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, No.caseSensitive) == -1, typeStr);
1601 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, No.caseSensitive) == -1, typeStr);
1602
1603 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, No.caseSensitive) == 2, typeStr);
1604 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, No.caseSensitive) == 2, typeStr);
1605 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, No.caseSensitive) == 3, typeStr);
1606 assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr);
1607 }}
1608
1609 foreach (cs; EnumMembers!CaseSensitive)
1610 {
1611 enum csString = to!string(cs);
1612
1613 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString);
1614 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString);
1615 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString);
1616 }
1617 }
1618 }
1619
1620 // https://issues.dlang.org/show_bug.cgi?id=20783
1621 @safe pure @nogc unittest
1622 {
1623 enum lastIndex = "aa".lastIndexOf("ab");
1624 assert(lastIndex == -1);
1625 }
1626
1627 @safe pure @nogc unittest
1628 {
1629 enum lastIndex = "hello hello hell h".lastIndexOf("hello");
1630 assert(lastIndex == 6);
1631 }
1632
1633 private ptrdiff_t indexOfAnyNeitherImpl(bool forward, bool any, Char, Char2)(
1634 const(Char)[] haystack, const(Char2)[] needles,
1635 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1636 if (isSomeChar!Char && isSomeChar!Char2)
1637 {
1638 import std.algorithm.searching : canFind, findAmong;
1639 if (cs == Yes.caseSensitive)
1640 {
1641 static if (forward)
1642 {
1643 static if (any)
1644 {
1645 size_t n = haystack.findAmong(needles).length;
1646 return n ? haystack.length - n : -1;
1647 }
1648 else
1649 {
foreach(idx,dchar hay;haystack)1650 foreach (idx, dchar hay; haystack)
1651 {
1652 if (!canFind(needles, hay))
1653 {
1654 return idx;
1655 }
1656 }
1657 }
1658 }
1659 else
1660 {
1661 static if (any)
1662 {
1663 import std.range : retro;
1664 import std.utf : strideBack;
1665 size_t n = haystack.retro.findAmong(needles).source.length;
1666 if (n)
1667 {
1668 return n - haystack.strideBack(n);
1669 }
1670 }
1671 else
1672 {
foreach_reverse(idx,dchar hay;haystack)1673 foreach_reverse (idx, dchar hay; haystack)
1674 {
1675 if (!canFind(needles, hay))
1676 {
1677 return idx;
1678 }
1679 }
1680 }
1681 }
1682 }
1683 else
1684 {
1685 import std.range.primitives : walkLength;
1686 if (needles.length <= 16 && needles.walkLength(17))
1687 {
1688 size_t si = 0;
1689 dchar[16] scratch = void;
foreach(dchar c;needles)1690 foreach ( dchar c; needles)
1691 {
1692 scratch[si++] = toLower(c);
1693 }
1694
1695 static if (forward)
1696 {
foreach(i,dchar c;haystack)1697 foreach (i, dchar c; haystack)
1698 {
1699 if (canFind(scratch[0 .. si], toLower(c)) == any)
1700 {
1701 return i;
1702 }
1703 }
1704 }
1705 else
1706 {
foreach_reverse(i,dchar c;haystack)1707 foreach_reverse (i, dchar c; haystack)
1708 {
1709 if (canFind(scratch[0 .. si], toLower(c)) == any)
1710 {
1711 return i;
1712 }
1713 }
1714 }
1715 }
1716 else
1717 {
f(dchar a,dchar b)1718 static bool f(dchar a, dchar b)
1719 {
1720 return toLower(a) == b;
1721 }
1722
1723 static if (forward)
1724 {
foreach(i,dchar c;haystack)1725 foreach (i, dchar c; haystack)
1726 {
1727 if (canFind!f(needles, toLower(c)) == any)
1728 {
1729 return i;
1730 }
1731 }
1732 }
1733 else
1734 {
foreach_reverse(i,dchar c;haystack)1735 foreach_reverse (i, dchar c; haystack)
1736 {
1737 if (canFind!f(needles, toLower(c)) == any)
1738 {
1739 return i;
1740 }
1741 }
1742 }
1743 }
1744 }
1745
1746 return -1;
1747 }
1748
1749 /**
1750 Returns the index of the first occurrence of any of the elements in $(D
1751 needles) in `haystack`. If no element of `needles` is found,
1752 then `-1` is returned. The `startIdx` slices `haystack` in the
1753 following way $(D haystack[startIdx .. $]). `startIdx` represents a
1754 codeunit index in `haystack`. If the sequence ending at `startIdx`
1755 does not represent a well formed codepoint, then a $(REF UTFException, std,utf)
1756 may be thrown.
1757
1758 Params:
1759 haystack = String to search for needles in.
1760 needles = Strings to search for in haystack.
1761 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If
1762 the startIdx is greater equal the length of haystack the functions
1763 returns `-1`.
1764 cs = Indicates whether the comparisons are case sensitive.
1765 */
1766 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1767 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1768 if (isSomeChar!Char && isSomeChar!Char2)
1769 {
1770 return indexOfAnyNeitherImpl!(true, true)(haystack, needles, cs);
1771 }
1772
1773 /// Ditto
1774 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1775 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1776 if (isSomeChar!Char && isSomeChar!Char2)
1777 {
1778 if (startIdx < haystack.length)
1779 {
1780 ptrdiff_t foundIdx = indexOfAny(haystack[startIdx .. $], needles, cs);
1781 if (foundIdx != -1)
1782 {
1783 return foundIdx + cast(ptrdiff_t) startIdx;
1784 }
1785 }
1786
1787 return -1;
1788 }
1789
1790 ///
1791 @safe pure unittest
1792 {
1793 import std.conv : to;
1794
1795 ptrdiff_t i = "helloWorld".indexOfAny("Wr");
1796 assert(i == 5);
1797 i = "öällo world".indexOfAny("lo ");
1798 assert(i == 4, to!string(i));
1799 }
1800
1801 ///
1802 @safe pure unittest
1803 {
1804 import std.conv : to;
1805
1806 ptrdiff_t i = "helloWorld".indexOfAny("Wr", 4);
1807 assert(i == 5);
1808
1809 i = "Foo öällo world".indexOfAny("lh", 3);
1810 assert(i == 8, to!string(i));
1811 }
1812
1813 @safe pure unittest
1814 {
1815 import std.conv : to;
1816
1817 static foreach (S; AliasSeq!(string, wstring, dstring))
1818 {{
1819 auto r = to!S("").indexOfAny("hello");
1820 assert(r == -1, to!string(r));
1821
1822 r = to!S("hello").indexOfAny("");
1823 assert(r == -1, to!string(r));
1824
1825 r = to!S("").indexOfAny("");
1826 assert(r == -1, to!string(r));
1827 }}
1828 }
1829
1830 @safe pure unittest
1831 {
1832 import std.conv : to;
1833 import std.exception : assertCTFEable;
1834
1835 assertCTFEable!(
1836 {
1837 static foreach (S; AliasSeq!(string, wstring, dstring))
1838 {
1839 static foreach (T; AliasSeq!(string, wstring, dstring))
1840 {
1841 assert(indexOfAny(cast(S) null, to!T("a")) == -1);
1842 assert(indexOfAny(to!S("def"), to!T("rsa")) == -1);
1843 assert(indexOfAny(to!S("abba"), to!T("a")) == 0);
1844 assert(indexOfAny(to!S("def"), to!T("f")) == 2);
1845 assert(indexOfAny(to!S("dfefffg"), to!T("fgh")) == 1);
1846 assert(indexOfAny(to!S("dfeffgfff"), to!T("feg")) == 1);
1847
1848 assert(indexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
1849 No.caseSensitive) == -1);
1850 assert(indexOfAny(to!S("def"), to!T("MI6"),
1851 No.caseSensitive) == -1);
1852 assert(indexOfAny(to!S("abba"), to!T("DEA"),
1853 No.caseSensitive) == 0);
1854 assert(indexOfAny(to!S("def"), to!T("FBI"), No.caseSensitive) == 2);
1855 assert(indexOfAny(to!S("dfefffg"), to!T("NSA"), No.caseSensitive)
1856 == -1);
1857 assert(indexOfAny(to!S("dfeffgfff"), to!T("BND"),
1858 No.caseSensitive) == 0);
1859 assert(indexOfAny(to!S("dfeffgfff"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
1860 No.caseSensitive) == 0);
1861
1862 assert(indexOfAny("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
1863 }
1864 }
1865 }
1866 );
1867 }
1868
1869 @safe pure unittest
1870 {
1871 import std.conv : to;
1872 import std.traits : EnumMembers;
1873
1874 static foreach (S; AliasSeq!(string, wstring, dstring))
1875 {
1876 static foreach (T; AliasSeq!(string, wstring, dstring))
1877 {
1878 assert(indexOfAny(cast(S) null, to!T("a"), 1337) == -1);
1879 assert(indexOfAny(to!S("def"), to!T("AaF"), 0) == -1);
1880 assert(indexOfAny(to!S("abba"), to!T("NSa"), 2) == 3);
1881 assert(indexOfAny(to!S("def"), to!T("fbi"), 1) == 2);
1882 assert(indexOfAny(to!S("dfefffg"), to!T("foo"), 2) == 3);
1883 assert(indexOfAny(to!S("dfeffgfff"), to!T("fsb"), 5) == 6);
1884
1885 assert(indexOfAny(to!S("dfeffgfff"), to!T("NDS"), 1,
1886 No.caseSensitive) == -1);
1887 assert(indexOfAny(to!S("def"), to!T("DRS"), 2,
1888 No.caseSensitive) == -1);
1889 assert(indexOfAny(to!S("abba"), to!T("SI"), 3,
1890 No.caseSensitive) == -1);
1891 assert(indexOfAny(to!S("deO"), to!T("ASIO"), 1,
1892 No.caseSensitive) == 2);
1893 assert(indexOfAny(to!S("dfefffg"), to!T("fbh"), 2,
1894 No.caseSensitive) == 3);
1895 assert(indexOfAny(to!S("dfeffgfff"), to!T("fEe"), 4,
1896 No.caseSensitive) == 4);
1897 assert(indexOfAny(to!S("dfeffgffföä"), to!T("föä"), 9,
1898 No.caseSensitive) == 9);
1899
1900 assert(indexOfAny("\u0100", to!T("\u0100"), 0,
1901 No.caseSensitive) == 0);
1902 }
1903
1904 foreach (cs; EnumMembers!CaseSensitive)
1905 {
1906 assert(indexOfAny("hello\U00010143\u0100\U00010143",
1907 to!S("e\u0100"), 3, cs) == 9);
1908 assert(indexOfAny("hello\U00010143\u0100\U00010143"w,
1909 to!S("h\u0100"), 3, cs) == 7);
1910 assert(indexOfAny("hello\U00010143\u0100\U00010143"d,
1911 to!S("l\u0100"), 5, cs) == 6);
1912 }
1913 }
1914 }
1915
1916 /**
1917 Returns the index of the last occurrence of any of the elements in $(D
1918 needles) in `haystack`. If no element of `needles` is found,
1919 then `-1` is returned. The `stopIdx` slices `haystack` in the
1920 following way $(D s[0 .. stopIdx]). `stopIdx` represents a codeunit
1921 index in `haystack`. If the sequence ending at `startIdx` does not
1922 represent a well formed codepoint, then a $(REF UTFException, std,utf) may be
1923 thrown.
1924
1925 Params:
1926 haystack = String to search for needles in.
1927 needles = Strings to search for in haystack.
1928 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]). If
1929 the stopIdx is greater equal the length of haystack the functions
1930 returns `-1`.
1931 cs = Indicates whether the comparisons are case sensitive.
1932 */
1933 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1934 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
1935 @safe pure
1936 if (isSomeChar!Char && isSomeChar!Char2)
1937 {
1938 return indexOfAnyNeitherImpl!(false, true)(haystack, needles, cs);
1939 }
1940
1941 /// Ditto
1942 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1943 const(Char2)[] needles, in size_t stopIdx,
1944 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1945 if (isSomeChar!Char && isSomeChar!Char2)
1946 {
1947 if (stopIdx <= haystack.length)
1948 {
1949 return lastIndexOfAny(haystack[0u .. stopIdx], needles, cs);
1950 }
1951
1952 return -1;
1953 }
1954
1955 ///
1956 @safe pure unittest
1957 {
1958 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo");
1959 assert(i == 8);
1960
1961 i = "Foo öäöllo world".lastIndexOfAny("öF");
1962 assert(i == 8);
1963 }
1964
1965 ///
1966 @safe pure unittest
1967 {
1968 import std.conv : to;
1969
1970 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo", 4);
1971 assert(i == 3);
1972
1973 i = "Foo öäöllo world".lastIndexOfAny("öF", 3);
1974 assert(i == 0);
1975 }
1976
1977 @safe pure unittest
1978 {
1979 import std.conv : to;
1980
1981 static foreach (S; AliasSeq!(string, wstring, dstring))
1982 {{
1983 auto r = to!S("").lastIndexOfAny("hello");
1984 assert(r == -1, to!string(r));
1985
1986 r = to!S("hello").lastIndexOfAny("");
1987 assert(r == -1, to!string(r));
1988
1989 r = to!S("").lastIndexOfAny("");
1990 assert(r == -1, to!string(r));
1991 }}
1992 }
1993
1994 @safe pure unittest
1995 {
1996 import std.conv : to;
1997 import std.exception : assertCTFEable;
1998
1999 assertCTFEable!(
2000 {
2001 static foreach (S; AliasSeq!(string, wstring, dstring))
2002 {
2003 static foreach (T; AliasSeq!(string, wstring, dstring))
2004 {{
2005 assert(lastIndexOfAny(cast(S) null, to!T("a")) == -1);
2006 assert(lastIndexOfAny(to!S("def"), to!T("rsa")) == -1);
2007 assert(lastIndexOfAny(to!S("abba"), to!T("a")) == 3);
2008 assert(lastIndexOfAny(to!S("def"), to!T("f")) == 2);
2009 assert(lastIndexOfAny(to!S("dfefffg"), to!T("fgh")) == 6);
2010
2011 ptrdiff_t oeIdx = 9;
2012 if (is(S == wstring) || is(S == dstring))
2013 {
2014 oeIdx = 8;
2015 }
2016
2017 auto foundOeIdx = lastIndexOfAny(to!S("dfeffgföf"), to!T("feg"));
2018 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2019
2020 assert(lastIndexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
2021 No.caseSensitive) == -1);
2022 assert(lastIndexOfAny(to!S("def"), to!T("MI6"),
2023 No.caseSensitive) == -1);
2024 assert(lastIndexOfAny(to!S("abba"), to!T("DEA"),
2025 No.caseSensitive) == 3);
2026 assert(lastIndexOfAny(to!S("def"), to!T("FBI"),
2027 No.caseSensitive) == 2);
2028 assert(lastIndexOfAny(to!S("dfefffg"), to!T("NSA"),
2029 No.caseSensitive) == -1);
2030
2031 oeIdx = 2;
2032 if (is(S == wstring) || is(S == dstring))
2033 {
2034 oeIdx = 1;
2035 }
2036 assert(lastIndexOfAny(to!S("ödfeffgfff"), to!T("BND"),
2037 No.caseSensitive) == oeIdx);
2038
2039 assert(lastIndexOfAny("\u0100", to!T("\u0100"),
2040 No.caseSensitive) == 0);
2041 }}
2042 }
2043 }
2044 );
2045 }
2046
2047 @safe pure unittest
2048 {
2049 import std.conv : to;
2050 import std.exception : assertCTFEable;
2051
2052 assertCTFEable!(
2053 {
2054 static foreach (S; AliasSeq!(string, wstring, dstring))
2055 {
2056 static foreach (T; AliasSeq!(string, wstring, dstring))
2057 {{
2058 enum typeStr = S.stringof ~ " " ~ T.stringof;
2059
2060 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337) == -1,
2061 typeStr);
2062 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("c"), 7) == 6,
2063 typeStr);
2064 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("cd"), 5) == 3,
2065 typeStr);
2066 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("ef"), 6) == 5,
2067 typeStr);
2068 assert(lastIndexOfAny(to!S("abcdefCdef"), to!T("c"), 8) == 2,
2069 typeStr);
2070 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("x"), 7) == -1,
2071 typeStr);
2072 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("xy"), 4) == -1,
2073 typeStr);
2074 assert(lastIndexOfAny(to!S("öabcdefcdef"), to!T("ö"), 2) == 0,
2075 typeStr);
2076
2077 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337,
2078 No.caseSensitive) == -1, typeStr);
2079 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("C"), 7,
2080 No.caseSensitive) == 6, typeStr);
2081 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("cd"), 5,
2082 No.caseSensitive) == 3, typeStr);
2083 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("EF"), 6,
2084 No.caseSensitive) == 5, typeStr);
2085 assert(lastIndexOfAny(to!S("ABCDEFcDEF"), to!T("C"), 8,
2086 No.caseSensitive) == 6, typeStr);
2087 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("x"), 7,
2088 No.caseSensitive) == -1, typeStr);
2089 assert(lastIndexOfAny(to!S("abCdefcdef"), to!T("XY"), 4,
2090 No.caseSensitive) == -1, typeStr);
2091 assert(lastIndexOfAny(to!S("ÖABCDEFCDEF"), to!T("ö"), 2,
2092 No.caseSensitive) == 0, typeStr);
2093 }}
2094 }
2095 }
2096 );
2097 }
2098
2099 /**
2100 Returns the index of the first occurrence of any character not an elements
2101 in `needles` in `haystack`. If all element of `haystack` are
2102 element of `needles` `-1` is returned.
2103
2104 Params:
2105 haystack = String to search for needles in.
2106 needles = Strings to search for in haystack.
2107 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If
2108 the startIdx is greater equal the length of haystack the functions
2109 returns `-1`.
2110 cs = Indicates whether the comparisons are case sensitive.
2111 */
2112 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
2113 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
2114 @safe pure
2115 if (isSomeChar!Char && isSomeChar!Char2)
2116 {
2117 return indexOfAnyNeitherImpl!(true, false)(haystack, needles, cs);
2118 }
2119
2120 /// Ditto
2121 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
2122 const(Char2)[] needles, in size_t startIdx,
2123 in CaseSensitive cs = Yes.caseSensitive)
2124 @safe pure
2125 if (isSomeChar!Char && isSomeChar!Char2)
2126 {
2127 if (startIdx < haystack.length)
2128 {
2129 ptrdiff_t foundIdx = indexOfAnyNeitherImpl!(true, false)(
2130 haystack[startIdx .. $], needles, cs);
2131 if (foundIdx != -1)
2132 {
2133 return foundIdx + cast(ptrdiff_t) startIdx;
2134 }
2135 }
2136 return -1;
2137 }
2138
2139 ///
2140 @safe pure unittest
2141 {
2142 assert(indexOfNeither("abba", "a", 2) == 2);
2143 assert(indexOfNeither("def", "de", 1) == 2);
2144 assert(indexOfNeither("dfefffg", "dfe", 4) == 6);
2145 }
2146
2147 ///
2148 @safe pure unittest
2149 {
2150 assert(indexOfNeither("def", "a") == 0);
2151 assert(indexOfNeither("def", "de") == 2);
2152 assert(indexOfNeither("dfefffg", "dfe") == 6);
2153 }
2154
2155 @safe pure unittest
2156 {
2157 import std.conv : to;
2158
2159 static foreach (S; AliasSeq!(string, wstring, dstring))
2160 {{
2161 auto r = to!S("").indexOfNeither("hello");
2162 assert(r == -1, to!string(r));
2163
2164 r = to!S("hello").indexOfNeither("");
2165 assert(r == 0, to!string(r));
2166
2167 r = to!S("").indexOfNeither("");
2168 assert(r == -1, to!string(r));
2169 }}
2170 }
2171
2172 @safe pure unittest
2173 {
2174 import std.conv : to;
2175 import std.exception : assertCTFEable;
2176
2177 assertCTFEable!(
2178 {
2179 static foreach (S; AliasSeq!(string, wstring, dstring))
2180 {
2181 static foreach (T; AliasSeq!(string, wstring, dstring))
2182 {
2183 assert(indexOfNeither(cast(S) null, to!T("a")) == -1);
2184 assert(indexOfNeither("abba", "a") == 1);
2185
2186 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"),
2187 No.caseSensitive) == 0);
2188 assert(indexOfNeither(to!S("def"), to!T("D"),
2189 No.caseSensitive) == 1);
2190 assert(indexOfNeither(to!S("ABca"), to!T("a"),
2191 No.caseSensitive) == 1);
2192 assert(indexOfNeither(to!S("def"), to!T("f"),
2193 No.caseSensitive) == 0);
2194 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"),
2195 No.caseSensitive) == 6);
2196 if (is(S == string))
2197 {
2198 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2199 No.caseSensitive) == 8,
2200 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2201 No.caseSensitive)));
2202 }
2203 else
2204 {
2205 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2206 No.caseSensitive) == 7,
2207 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2208 No.caseSensitive)));
2209 }
2210 }
2211 }
2212 }
2213 );
2214 }
2215
2216 @safe pure unittest
2217 {
2218 import std.conv : to;
2219 import std.exception : assertCTFEable;
2220
2221 assertCTFEable!(
2222 {
2223 static foreach (S; AliasSeq!(string, wstring, dstring))
2224 {
2225 static foreach (T; AliasSeq!(string, wstring, dstring))
2226 {
2227 assert(indexOfNeither(cast(S) null, to!T("a"), 1) == -1);
2228 assert(indexOfNeither(to!S("def"), to!T("a"), 1) == 1,
2229 to!string(indexOfNeither(to!S("def"), to!T("a"), 1)));
2230
2231 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 4,
2232 No.caseSensitive) == 4);
2233 assert(indexOfNeither(to!S("def"), to!T("D"), 2,
2234 No.caseSensitive) == 2);
2235 assert(indexOfNeither(to!S("ABca"), to!T("a"), 3,
2236 No.caseSensitive) == -1);
2237 assert(indexOfNeither(to!S("def"), to!T("tzf"), 2,
2238 No.caseSensitive) == -1);
2239 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 5,
2240 No.caseSensitive) == 6);
2241 if (is(S == string))
2242 {
2243 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2244 No.caseSensitive) == 3, to!string(indexOfNeither(
2245 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2246 }
2247 else
2248 {
2249 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2250 No.caseSensitive) == 2, to!string(indexOfNeither(
2251 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2252 }
2253 }
2254 }
2255 }
2256 );
2257 }
2258
2259 /**
2260 Returns the last index of the first occurence of any character that is not
2261 an elements in `needles` in `haystack`. If all element of
2262 `haystack` are element of `needles` `-1` is returned.
2263
2264 Params:
2265 haystack = String to search for needles in.
2266 needles = Strings to search for in haystack.
2267 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]) If
2268 the stopIdx is greater equal the length of haystack the functions
2269 returns `-1`.
2270 cs = Indicates whether the comparisons are case sensitive.
2271 */
2272 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2273 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
2274 @safe pure
2275 if (isSomeChar!Char && isSomeChar!Char2)
2276 {
2277 return indexOfAnyNeitherImpl!(false, false)(haystack, needles, cs);
2278 }
2279
2280 /// Ditto
2281 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2282 const(Char2)[] needles, in size_t stopIdx,
2283 in CaseSensitive cs = Yes.caseSensitive)
2284 @safe pure
2285 if (isSomeChar!Char && isSomeChar!Char2)
2286 {
2287 if (stopIdx < haystack.length)
2288 {
2289 return indexOfAnyNeitherImpl!(false, false)(haystack[0 .. stopIdx],
2290 needles, cs);
2291 }
2292 return -1;
2293 }
2294
2295 ///
2296 @safe pure unittest
2297 {
2298 assert(lastIndexOfNeither("abba", "a") == 2);
2299 assert(lastIndexOfNeither("def", "f") == 1);
2300 }
2301
2302 ///
2303 @safe pure unittest
2304 {
2305 assert(lastIndexOfNeither("def", "rsa", 3) == -1);
2306 assert(lastIndexOfNeither("abba", "a", 2) == 1);
2307 }
2308
2309 @safe pure unittest
2310 {
2311 import std.conv : to;
2312
2313 static foreach (S; AliasSeq!(string, wstring, dstring))
2314 {{
2315 auto r = to!S("").lastIndexOfNeither("hello");
2316 assert(r == -1, to!string(r));
2317
2318 r = to!S("hello").lastIndexOfNeither("");
2319 assert(r == 4, to!string(r));
2320
2321 r = to!S("").lastIndexOfNeither("");
2322 assert(r == -1, to!string(r));
2323 }}
2324 }
2325
2326 @safe pure unittest
2327 {
2328 import std.conv : to;
2329 import std.exception : assertCTFEable;
2330
2331 assertCTFEable!(
2332 {
2333 static foreach (S; AliasSeq!(string, wstring, dstring))
2334 {
2335 static foreach (T; AliasSeq!(string, wstring, dstring))
2336 {{
2337 assert(lastIndexOfNeither(cast(S) null, to!T("a")) == -1);
2338 assert(lastIndexOfNeither(to!S("def"), to!T("rsa")) == 2);
2339 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2340
2341 ptrdiff_t oeIdx = 8;
2342 if (is(S == string))
2343 {
2344 oeIdx = 9;
2345 }
2346
2347 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"));
2348 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2349
2350 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"),
2351 No.caseSensitive) == 5);
2352 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"),
2353 No.caseSensitive) == 2, to!string(lastIndexOfNeither(to!S("def"),
2354 to!T("MI6"), No.caseSensitive)));
2355 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"),
2356 No.caseSensitive) == 6, to!string(lastIndexOfNeither(
2357 to!S("abbadeafsb"), to!T("fSb"), No.caseSensitive)));
2358 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"),
2359 No.caseSensitive) == 1);
2360 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"),
2361 No.caseSensitive) == 6);
2362 assert(lastIndexOfNeither(to!S("dfeffgfffö"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
2363 No.caseSensitive) == 8, to!string(lastIndexOfNeither(to!S("dfeffgfffö"),
2364 to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), No.caseSensitive)));
2365 }}
2366 }
2367 }
2368 );
2369 }
2370
2371 @safe pure unittest
2372 {
2373 import std.conv : to;
2374 import std.exception : assertCTFEable;
2375
2376 assertCTFEable!(
2377 {
2378 static foreach (S; AliasSeq!(string, wstring, dstring))
2379 {
2380 static foreach (T; AliasSeq!(string, wstring, dstring))
2381 {{
2382 assert(lastIndexOfNeither(cast(S) null, to!T("a"), 1337) == -1);
2383 assert(lastIndexOfNeither(to!S("def"), to!T("f")) == 1);
2384 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2385
2386 ptrdiff_t oeIdx = 4;
2387 if (is(S == string))
2388 {
2389 oeIdx = 5;
2390 }
2391
2392 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"),
2393 7);
2394 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2395
2396 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 6,
2397 No.caseSensitive) == 5);
2398 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2,
2399 No.caseSensitive) == 1, to!string(lastIndexOfNeither(to!S("def"),
2400 to!T("MI6"), 2, No.caseSensitive)));
2401 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 6,
2402 No.caseSensitive) == 5, to!string(lastIndexOfNeither(
2403 to!S("abbadeafsb"), to!T("fSb"), 6, No.caseSensitive)));
2404 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 3,
2405 No.caseSensitive) == 1);
2406 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2,
2407 No.caseSensitive) == 1, to!string(lastIndexOfNeither(
2408 to!S("dfefffg"), to!T("NSA"), 2, No.caseSensitive)));
2409 }}
2410 }
2411 }
2412 );
2413 }
2414
2415 /**
2416 * Returns the _representation of a string, which has the same type
2417 * as the string except the character type is replaced by `ubyte`,
2418 * `ushort`, or `uint` depending on the character width.
2419 *
2420 * Params:
2421 * s = The string to return the _representation of.
2422 *
2423 * Returns:
2424 * The _representation of the passed string.
2425 */
2426 auto representation(Char)(Char[] s) @safe pure nothrow @nogc
2427 if (isSomeChar!Char)
2428 {
2429 import std.traits : ModifyTypePreservingTQ;
2430 alias ToRepType(T) = AliasSeq!(ubyte, ushort, uint)[T.sizeof / 2];
2431 return cast(ModifyTypePreservingTQ!(ToRepType, Char)[])s;
2432 }
2433
2434 ///
2435 @safe pure unittest
2436 {
2437 string s = "hello";
2438 static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
2439 assert(representation(s) is cast(immutable(ubyte)[]) s);
2440 assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]);
2441 }
2442
2443 @system pure unittest
2444 {
2445 import std.exception : assertCTFEable;
2446 import std.traits : Fields;
2447 import std.typecons : Tuple;
2448
2449 assertCTFEable!(
2450 {
2451 void test(Char, T)(Char[] str)
2452 {
2453 static assert(is(typeof(representation(str)) == T[]));
2454 assert(representation(str) is cast(T[]) str);
2455 }
2456
2457 static foreach (Type; AliasSeq!(Tuple!(char , ubyte ),
2458 Tuple!(wchar, ushort),
2459 Tuple!(dchar, uint )))
2460 {{
2461 alias Char = Fields!Type[0];
2462 alias Int = Fields!Type[1];
2463 enum immutable(Char)[] hello = "hello";
2464
2465 test!( immutable Char, immutable Int)(hello);
2466 test!( const Char, const Int)(hello);
2467 test!( Char, Int)(hello.dup);
2468 test!( shared Char, shared Int)(cast(shared) hello.dup);
2469 test!(const shared Char, const shared Int)(hello);
2470 }}
2471 });
2472 }
2473
2474
2475 /**
2476 * Capitalize the first character of `s` and convert the rest of `s` to
2477 * lowercase.
2478 *
2479 * Params:
2480 * input = The string to _capitalize.
2481 *
2482 * Returns:
2483 * The capitalized string.
2484 *
2485 * See_Also:
2486 * $(REF asCapitalized, std,uni) for a lazy range version that doesn't allocate memory
2487 */
2488 S capitalize(S)(S input) @trusted pure
2489 if (isSomeString!S)
2490 {
2491 import std.array : array;
2492 import std.uni : asCapitalized;
2493 import std.utf : byUTF;
2494
2495 return input.asCapitalized.byUTF!(ElementEncodingType!(S)).array;
2496 }
2497
2498 ///
2499 pure @safe unittest
2500 {
2501 assert(capitalize("hello") == "Hello");
2502 assert(capitalize("World") == "World");
2503 }
2504
2505 auto capitalize(S)(auto ref S s)
2506 if (!isSomeString!S && is(StringTypeOf!S))
2507 {
2508 return capitalize!(StringTypeOf!S)(s);
2509 }
2510
2511 @safe pure unittest
2512 {
2513 assert(testAliasedString!capitalize("hello"));
2514 }
2515
2516 @safe pure unittest
2517 {
2518 import std.algorithm.comparison : cmp;
2519 import std.conv : to;
2520 import std.exception : assertCTFEable;
2521
2522 assertCTFEable!(
2523 {
2524 static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
2525 {{
2526 S s1 = to!S("FoL");
2527 S s2;
2528
2529 s2 = capitalize(s1);
2530 assert(cmp(s2, "Fol") == 0);
2531 assert(s2 !is s1);
2532
2533 s2 = capitalize(s1[0 .. 2]);
2534 assert(cmp(s2, "Fo") == 0);
2535
2536 s1 = to!S("fOl");
2537 s2 = capitalize(s1);
2538 assert(cmp(s2, "Fol") == 0);
2539 assert(s2 !is s1);
2540 s1 = to!S("\u0131 \u0130");
2541 s2 = capitalize(s1);
2542 assert(cmp(s2, "\u0049 i\u0307") == 0);
2543 assert(s2 !is s1);
2544
2545 s1 = to!S("\u017F \u0049");
2546 s2 = capitalize(s1);
2547 assert(cmp(s2, "\u0053 \u0069") == 0);
2548 assert(s2 !is s1);
2549 }}
2550 });
2551 }
2552
2553 /++
2554 Split `s` into an array of lines according to the unicode standard using
2555 `'\r'`, `'\n'`, `"\r\n"`, $(REF lineSep, std,uni),
2556 $(REF paraSep, std,uni), `U+0085` (NEL), `'\v'` and `'\f'`
2557 as delimiters. If `keepTerm` is set to `KeepTerminator.yes`, then the
2558 delimiter is included in the strings returned.
2559
2560 Does not throw on invalid UTF; such is simply passed unchanged
2561 to the output.
2562
2563 Allocates memory; use $(LREF lineSplitter) for an alternative that
2564 does not.
2565
2566 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2567
2568 Params:
2569 s = a string of `chars`, `wchars`, or `dchars`, or any custom
2570 type that casts to a `string` type
2571 keepTerm = whether delimiter is included or not in the results
2572 Returns:
2573 array of strings, each element is a line that is a slice of `s`
2574 See_Also:
2575 $(LREF lineSplitter)
2576 $(REF splitter, std,algorithm)
2577 $(REF splitter, std,regex)
2578 +/
2579 alias KeepTerminator = Flag!"keepTerminator";
2580
2581 /// ditto
2582 C[][] splitLines(C)(C[] s, KeepTerminator keepTerm = No.keepTerminator) @safe pure
2583 if (isSomeChar!C)
2584 {
2585 import std.array : appender;
2586 import std.uni : lineSep, paraSep;
2587
2588 size_t iStart = 0;
2589 auto retval = appender!(C[][])();
2590
2591 for (size_t i; i < s.length; ++i)
2592 {
2593 switch (s[i])
2594 {
2595 case '\v', '\f', '\n':
2596 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator)]);
2597 iStart = i + 1;
2598 break;
2599
2600 case '\r':
2601 if (i + 1 < s.length && s[i + 1] == '\n')
2602 {
2603 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2604 iStart = i + 2;
2605 ++i;
2606 }
2607 else
2608 {
2609 goto case '\n';
2610 }
2611 break;
2612
2613 static if (s[i].sizeof == 1)
2614 {
2615 /* Manually decode:
2616 * lineSep is E2 80 A8
2617 * paraSep is E2 80 A9
2618 */
2619 case 0xE2:
2620 if (i + 2 < s.length &&
2621 s[i + 1] == 0x80 &&
2622 (s[i + 2] == 0xA8 || s[i + 2] == 0xA9)
2623 )
2624 {
2625 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 3]);
2626 iStart = i + 3;
2627 i += 2;
2628 }
2629 else
2630 goto default;
2631 break;
2632 /* Manually decode:
2633 * NEL is C2 85
2634 */
2635 case 0xC2:
2636 if (i + 1 < s.length && s[i + 1] == 0x85)
2637 {
2638 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2639 iStart = i + 2;
2640 i += 1;
2641 }
2642 else
2643 goto default;
2644 break;
2645 }
2646 else
2647 {
2648 case lineSep:
2649 case paraSep:
2650 case '\u0085':
2651 goto case '\n';
2652 }
2653
2654 default:
2655 break;
2656 }
2657 }
2658
2659 if (iStart != s.length)
2660 retval.put(s[iStart .. $]);
2661
2662 return retval.data;
2663 }
2664
2665 ///
2666 @safe pure nothrow unittest
2667 {
2668 string s = "Hello\nmy\rname\nis";
2669 assert(splitLines(s) == ["Hello", "my", "name", "is"]);
2670 }
2671
2672 @safe pure nothrow unittest
2673 {
2674 string s = "a\xC2\x86b";
2675 assert(splitLines(s) == [s]);
2676 }
2677
2678 @safe pure nothrow unittest
2679 {
2680 assert(testAliasedString!splitLines("hello\nworld"));
2681
2682 enum S : string { a = "hello\nworld" }
2683 assert(S.a.splitLines() == ["hello", "world"]);
2684 }
2685
2686 @system pure nothrow unittest
2687 {
2688 // dip1000 cannot express an array of scope arrays, so this is not @safe
2689 char[11] sa = "hello\nworld";
2690 assert(sa.splitLines() == ["hello", "world"]);
2691 }
2692
2693 @safe pure unittest
2694 {
2695 import std.conv : to;
2696 import std.exception : assertCTFEable;
2697
2698 assertCTFEable!(
2699 {
2700 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2701 {{
2702 auto s = to!S(
2703 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n" ~
2704 "mon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2705 );
2706 auto lines = splitLines(s);
2707 assert(lines.length == 14);
2708 assert(lines[0] == "");
2709 assert(lines[1] == "peter");
2710 assert(lines[2] == "");
2711 assert(lines[3] == "paul");
2712 assert(lines[4] == "jerry");
2713 assert(lines[5] == "ice");
2714 assert(lines[6] == "cream");
2715 assert(lines[7] == "");
2716 assert(lines[8] == "sunday");
2717 assert(lines[9] == "mon\u2030day");
2718 assert(lines[10] == "schadenfreude");
2719 assert(lines[11] == "kindergarten");
2720 assert(lines[12] == "");
2721 assert(lines[13] == "cookies");
2722
2723
2724 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
2725 auto ulines = splitLines(cast(char[]) u);
2726 assert(cast(ubyte[])(ulines[0]) == u);
2727
2728 lines = splitLines(s, Yes.keepTerminator);
2729 assert(lines.length == 14);
2730 assert(lines[0] == "\r");
2731 assert(lines[1] == "peter\n");
2732 assert(lines[2] == "\r");
2733 assert(lines[3] == "paul\r\n");
2734 assert(lines[4] == "jerry\u2028");
2735 assert(lines[5] == "ice\u2029");
2736 assert(lines[6] == "cream\n");
2737 assert(lines[7] == "\n");
2738 assert(lines[8] == "sunday\n");
2739 assert(lines[9] == "mon\u2030day\n");
2740 assert(lines[10] == "schadenfreude\v");
2741 assert(lines[11] == "kindergarten\f");
2742 assert(lines[12] == "\v");
2743 assert(lines[13] == "cookies\u0085");
2744
2745 s.popBack(); // Lop-off trailing \n
2746 lines = splitLines(s);
2747 assert(lines.length == 14);
2748 assert(lines[9] == "mon\u2030day");
2749
2750 lines = splitLines(s, Yes.keepTerminator);
2751 assert(lines.length == 14);
2752 assert(lines[13] == "cookies");
2753 }}
2754 });
2755 }
2756
2757 private struct LineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)
2758 {
2759 import std.conv : unsigned;
2760 import std.uni : lineSep, paraSep;
2761 private:
2762 Range _input;
2763
2764 alias IndexType = typeof(unsigned(_input.length));
2765 enum IndexType _unComputed = IndexType.max;
2766 IndexType iStart = _unComputed;
2767 IndexType iEnd = 0;
2768 IndexType iNext = 0;
2769
2770 public:
thisLineSplitter2771 this(Range input)
2772 {
2773 _input = input;
2774 }
2775
2776 static if (isInfinite!Range)
2777 {
2778 enum bool empty = false;
2779 }
2780 else
2781 {
emptyLineSplitter2782 @property bool empty()
2783 {
2784 return iStart == _unComputed && iNext == _input.length;
2785 }
2786 }
2787
typeofLineSplitter2788 @property typeof(_input) front()
2789 {
2790 if (iStart == _unComputed)
2791 {
2792 iStart = iNext;
2793 Loop:
2794 for (IndexType i = iNext; ; ++i)
2795 {
2796 if (i == _input.length)
2797 {
2798 iEnd = i;
2799 iNext = i;
2800 break Loop;
2801 }
2802 switch (_input[i])
2803 {
2804 case '\v', '\f', '\n':
2805 iEnd = i + (keepTerm == Yes.keepTerminator);
2806 iNext = i + 1;
2807 break Loop;
2808
2809 case '\r':
2810 if (i + 1 < _input.length && _input[i + 1] == '\n')
2811 {
2812 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2813 iNext = i + 2;
2814 break Loop;
2815 }
2816 else
2817 {
2818 goto case '\n';
2819 }
2820
2821 static if (_input[i].sizeof == 1)
2822 {
2823 /* Manually decode:
2824 * lineSep is E2 80 A8
2825 * paraSep is E2 80 A9
2826 */
2827 case 0xE2:
2828 if (i + 2 < _input.length &&
2829 _input[i + 1] == 0x80 &&
2830 (_input[i + 2] == 0xA8 || _input[i + 2] == 0xA9)
2831 )
2832 {
2833 iEnd = i + (keepTerm == Yes.keepTerminator) * 3;
2834 iNext = i + 3;
2835 break Loop;
2836 }
2837 else
2838 goto default;
2839 /* Manually decode:
2840 * NEL is C2 85
2841 */
2842 case 0xC2:
2843 if (i + 1 < _input.length && _input[i + 1] == 0x85)
2844 {
2845 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2846 iNext = i + 2;
2847 break Loop;
2848 }
2849 else
2850 goto default;
2851 }
2852 else
2853 {
2854 case '\u0085':
2855 case lineSep:
2856 case paraSep:
2857 goto case '\n';
2858 }
2859
2860 default:
2861 break;
2862 }
2863 }
2864 }
2865 return _input[iStart .. iEnd];
2866 }
2867
popFrontLineSplitter2868 void popFront()
2869 {
2870 if (iStart == _unComputed)
2871 {
2872 assert(!empty, "Can not popFront an empty range");
2873 front;
2874 }
2875 iStart = _unComputed;
2876 }
2877
2878 static if (isForwardRange!Range)
2879 {
typeofLineSplitter2880 @property typeof(this) save()
2881 {
2882 auto ret = this;
2883 ret._input = _input.save;
2884 return ret;
2885 }
2886 }
2887 }
2888
2889 /***********************************
2890 * Split an array or slicable range of characters into a range of lines
2891 using `'\r'`, `'\n'`, `'\v'`, `'\f'`, `"\r\n"`,
2892 $(REF lineSep, std,uni), $(REF paraSep, std,uni) and `'\u0085'` (NEL)
2893 as delimiters. If `keepTerm` is set to `Yes.keepTerminator`, then the
2894 delimiter is included in the slices returned.
2895
2896 Does not throw on invalid UTF; such is simply passed unchanged
2897 to the output.
2898
2899 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2900
2901 Does not allocate memory.
2902
2903 Params:
2904 r = array of `chars`, `wchars`, or `dchars` or a slicable range
2905 keepTerm = whether delimiter is included or not in the results
2906 Returns:
2907 range of slices of the input range `r`
2908
2909 See_Also:
2910 $(LREF splitLines)
2911 $(REF splitter, std,algorithm)
2912 $(REF splitter, std,regex)
2913 */
2914 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(Range r)
2915 if (hasSlicing!Range && hasLength!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range)
2916 {
2917 return LineSplitter!(keepTerm, Range)(r);
2918 }
2919
2920 /// Ditto
2921 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, C)(C[] r)
2922 if (isSomeChar!C)
2923 {
2924 return LineSplitter!(keepTerm, C[])(r);
2925 }
2926
2927 ///
2928 @safe pure unittest
2929 {
2930 import std.array : array;
2931
2932 string s = "Hello\nmy\rname\nis";
2933
2934 /* notice the call to 'array' to turn the lazy range created by
2935 lineSplitter comparable to the string[] created by splitLines.
2936 */
2937 assert(lineSplitter(s).array == splitLines(s));
2938 }
2939
2940 @safe pure unittest
2941 {
2942 import std.array : array;
2943 import std.conv : to;
2944 import std.exception : assertCTFEable;
2945
2946 assertCTFEable!(
2947 {
2948 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2949 {{
2950 auto s = to!S(
2951 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\n" ~
2952 "sunday\nmon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2953 );
2954
2955 auto lines = lineSplitter(s).array;
2956 assert(lines.length == 14);
2957 assert(lines[0] == "");
2958 assert(lines[1] == "peter");
2959 assert(lines[2] == "");
2960 assert(lines[3] == "paul");
2961 assert(lines[4] == "jerry");
2962 assert(lines[5] == "ice");
2963 assert(lines[6] == "cream");
2964 assert(lines[7] == "");
2965 assert(lines[8] == "sunday");
2966 assert(lines[9] == "mon\u2030day");
2967 assert(lines[10] == "schadenfreude");
2968 assert(lines[11] == "kindergarten");
2969 assert(lines[12] == "");
2970 assert(lines[13] == "cookies");
2971
2972
2973 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
2974 auto ulines = lineSplitter(cast(char[]) u).array;
2975 assert(cast(ubyte[])(ulines[0]) == u);
2976
2977 lines = lineSplitter!(Yes.keepTerminator)(s).array;
2978 assert(lines.length == 14);
2979 assert(lines[0] == "\r");
2980 assert(lines[1] == "peter\n");
2981 assert(lines[2] == "\r");
2982 assert(lines[3] == "paul\r\n");
2983 assert(lines[4] == "jerry\u2028");
2984 assert(lines[5] == "ice\u2029");
2985 assert(lines[6] == "cream\n");
2986 assert(lines[7] == "\n");
2987 assert(lines[8] == "sunday\n");
2988 assert(lines[9] == "mon\u2030day\n");
2989 assert(lines[10] == "schadenfreude\v");
2990 assert(lines[11] == "kindergarten\f");
2991 assert(lines[12] == "\v");
2992 assert(lines[13] == "cookies\u0085");
2993
2994 s.popBack(); // Lop-off trailing \n
2995 lines = lineSplitter(s).array;
2996 assert(lines.length == 14);
2997 assert(lines[9] == "mon\u2030day");
2998
2999 lines = lineSplitter!(Yes.keepTerminator)(s).array;
3000 assert(lines.length == 14);
3001 assert(lines[13] == "cookies");
3002 }}
3003 });
3004 }
3005
3006 ///
3007 @nogc @safe pure unittest
3008 {
3009 auto s = "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\nmon\u2030day\n";
3010 auto lines = s.lineSplitter();
3011 static immutable witness = ["", "peter", "", "paul", "jerry", "ice", "cream", "", "sunday", "mon\u2030day"];
3012 uint i;
foreach(line;lines)3013 foreach (line; lines)
3014 {
3015 assert(line == witness[i++]);
3016 }
3017 assert(i == witness.length);
3018 }
3019
3020 @nogc @safe pure unittest
3021 {
3022 import std.algorithm.comparison : equal;
3023 import std.range : only;
3024
3025 auto s = "std/string.d";
3026 auto as = TestAliasedString(s);
3027 assert(equal(s.lineSplitter(), as.lineSplitter()));
3028
3029 enum S : string { a = "hello\nworld" }
3030 assert(equal(S.a.lineSplitter(), only("hello", "world")));
3031
3032 char[S.a.length] sa = S.a[];
3033 assert(equal(sa.lineSplitter(), only("hello", "world")));
3034 }
3035
3036 @safe pure unittest
3037 {
3038 auto s = "line1\nline2";
3039 auto spl0 = s.lineSplitter!(Yes.keepTerminator);
3040 auto spl1 = spl0.save;
3041 spl0.popFront;
3042 assert(spl1.front ~ spl0.front == s);
3043 string r = "a\xC2\x86b";
3044 assert(r.lineSplitter.front == r);
3045 }
3046
3047 /++
3048 Strips leading whitespace (as defined by $(REF isWhite, std,uni)) or
3049 as specified in the second argument.
3050
3051 Params:
3052 input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3053 of characters
3054 chars = string of characters to be stripped
3055
3056 Returns: `input` stripped of leading whitespace or characters
3057 specified in the second argument.
3058
3059 Postconditions: `input` and the returned value
3060 will share the same tail (see $(REF sameTail, std,array)).
3061
3062 See_Also:
3063 Generic stripping on ranges: $(REF _stripLeft, std, algorithm, mutation)
3064 +/
3065 auto stripLeft(Range)(Range input)
3066 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
3067 !isInfinite!Range && !isConvertibleToString!Range)
3068 {
3069 import std.traits : isDynamicArray;
3070 static import std.ascii;
3071 static import std.uni;
3072
3073 static if (is(immutable ElementEncodingType!Range == immutable dchar)
3074 || is(immutable ElementEncodingType!Range == immutable wchar))
3075 {
3076 // Decoding is never needed for dchar. It happens not to be needed
3077 // here for wchar because no whitepace is outside the basic
3078 // multilingual plane meaning every whitespace character is encoded
3079 // with a single wchar and due to the design of UTF-16 those wchars
3080 // will not occur as part of the encoding of multi-wchar codepoints.
3081 static if (isDynamicArray!Range)
3082 {
3083 foreach (i; 0 .. input.length)
3084 {
3085 if (!std.uni.isWhite(input[i]))
3086 return input[i .. $];
3087 }
3088 return input[$ .. $];
3089 }
3090 else
3091 {
3092 while (!input.empty)
3093 {
3094 if (!std.uni.isWhite(input.front))
3095 break;
3096 input.popFront();
3097 }
3098 return input;
3099 }
3100 }
3101 else
3102 {
3103 static if (isDynamicArray!Range)
3104 {
3105 // ASCII optimization for dynamic arrays.
3106 size_t i = 0;
3107 for (const size_t end = input.length; i < end; ++i)
3108 {
3109 auto c = input[i];
3110 if (c >= 0x80) goto NonAsciiPath;
3111 if (!std.ascii.isWhite(c)) break;
3112 }
3113 input = input[i .. $];
3114 return input;
3115
3116 NonAsciiPath:
3117 input = input[i .. $];
3118 // Fall through to standard case.
3119 }
3120
3121 import std.utf : decode, decodeFront, UseReplacementDchar;
3122
3123 static if (isNarrowString!Range)
3124 {
3125 for (size_t index = 0; index < input.length;)
3126 {
3127 const saveIndex = index;
3128 if (!std.uni.isWhite(decode!(UseReplacementDchar.yes)(input, index)))
3129 return input[saveIndex .. $];
3130 }
3131 return input[$ .. $];
3132 }
3133 else
3134 {
3135 while (!input.empty)
3136 {
3137 auto c = input.front;
3138 if (std.ascii.isASCII(c))
3139 {
3140 if (!std.ascii.isWhite(c))
3141 break;
3142 input.popFront();
3143 }
3144 else
3145 {
3146 auto save = input.save;
3147 auto dc = decodeFront!(UseReplacementDchar.yes)(input);
3148 if (!std.uni.isWhite(dc))
3149 return save;
3150 }
3151 }
3152 return input;
3153 }
3154 }
3155 }
3156
3157 ///
3158 nothrow @safe pure unittest
3159 {
3160 import std.uni : lineSep, paraSep;
3161 assert(stripLeft(" hello world ") ==
3162 "hello world ");
3163 assert(stripLeft("\n\t\v\rhello world\n\t\v\r") ==
3164 "hello world\n\t\v\r");
3165 assert(stripLeft(" \u2028hello world") ==
3166 "hello world");
3167 assert(stripLeft("hello world") ==
3168 "hello world");
3169 assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) ==
3170 "hello world" ~ [lineSep]);
3171 assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) ==
3172 "hello world" ~ [paraSep]);
3173
3174 import std.array : array;
3175 import std.utf : byChar;
3176 assert(stripLeft(" hello world "w.byChar).array ==
3177 "hello world ");
3178 assert(stripLeft(" \u2022hello world ".byChar).array ==
3179 "\u2022hello world ");
3180 }
3181
3182 auto stripLeft(Range)(auto ref Range str)
3183 if (isConvertibleToString!Range)
3184 {
3185 return stripLeft!(StringTypeOf!Range)(str);
3186 }
3187
3188 @nogc nothrow @safe pure unittest
3189 {
3190 assert(testAliasedString!stripLeft(" hello"));
3191 }
3192
3193 /// Ditto
3194 auto stripLeft(Range, Char)(Range input, const(Char)[] chars)
3195 if (((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3196 isConvertibleToString!Range) && isSomeChar!Char)
3197 {
3198 static if (isConvertibleToString!Range)
3199 return stripLeft!(StringTypeOf!Range)(input, chars);
3200 else
3201 {
3202 for (; !input.empty; input.popFront)
3203 {
3204 if (chars.indexOf(input.front) == -1)
3205 break;
3206 }
3207 return input;
3208 }
3209 }
3210
3211 ///
3212 @safe pure unittest
3213 {
3214 assert(stripLeft(" hello world ", " ") ==
3215 "hello world ");
3216 assert(stripLeft("xxxxxhello world ", "x") ==
3217 "hello world ");
3218 assert(stripLeft("xxxyy hello world ", "xy ") ==
3219 "hello world ");
3220 }
3221
3222 ///
3223 @safe pure unittest
3224 {
3225 import std.array : array;
3226 import std.utf : byChar, byWchar, byDchar;
3227
3228 assert(stripLeft(" xxxyy hello world "w.byChar, "xy ").array ==
3229 "hello world ");
3230
3231 assert(stripLeft("\u2028\u2020hello world\u2028"w.byWchar,
3232 "\u2028").array == "\u2020hello world\u2028");
3233 assert(stripLeft("\U00010001hello world"w.byWchar, " ").array ==
3234 "\U00010001hello world"w);
3235 assert(stripLeft("\U00010001 xyhello world"d.byDchar,
3236 "\U00010001 xy").array == "hello world"d);
3237
3238 assert(stripLeft("\u2020hello"w, "\u2020"w) == "hello"w);
3239 assert(stripLeft("\U00010001hello"d, "\U00010001"d) == "hello"d);
3240 assert(stripLeft(" hello ", "") == " hello ");
3241 }
3242
3243 @safe pure unittest
3244 {
3245 assert(testAliasedString!stripLeft(" xyz hello", "xyz "));
3246 }
3247
3248 /++
3249 Strips trailing whitespace (as defined by $(REF isWhite, std,uni)) or
3250 as specified in the second argument.
3251
3252 Params:
3253 str = string or random access range of characters
3254 chars = string of characters to be stripped
3255
3256 Returns:
3257 slice of `str` stripped of trailing whitespace or characters
3258 specified in the second argument.
3259
3260 See_Also:
3261 Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation)
3262 +/
3263 auto stripRight(Range)(Range str)
3264 if (isSomeString!Range ||
3265 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
3266 !isConvertibleToString!Range &&
3267 isSomeChar!(ElementEncodingType!Range))
3268 {
3269 import std.traits : isDynamicArray;
3270 import std.uni : isWhite;
3271 alias C = Unqual!(ElementEncodingType!(typeof(str)));
3272
3273 static if (isSomeString!(typeof(str)) && C.sizeof >= 2)
3274 {
3275 // No whitespace takes multiple wchars to encode and due to
3276 // the design of UTF-16 those wchars will not occur as part
3277 // of the encoding of multi-wchar codepoints.
foreach_reverse(i,C c;str)3278 foreach_reverse (i, C c; str)
3279 {
3280 if (!isWhite(c))
3281 return str[0 .. i + 1];
3282 }
3283 return str[0 .. 0];
3284 }
3285 else
3286 {
3287 // ASCII optimization for dynamic arrays.
3288 static if (isDynamicArray!(typeof(str)))
3289 {
3290 static import std.ascii;
foreach_reverse(i,C c;str)3291 foreach_reverse (i, C c; str)
3292 {
3293 if (c >= 0x80)
3294 {
3295 str = str[0 .. i + 1];
3296 goto NonAsciiPath;
3297 }
3298 if (!std.ascii.isWhite(c))
3299 {
3300 return str[0 .. i + 1];
3301 }
3302 }
3303 return str[0 .. 0];
3304 }
3305
3306 NonAsciiPath:
3307
3308 size_t i = str.length;
3309 while (i--)
3310 {
3311 static if (C.sizeof >= 2)
3312 {
3313 // No whitespace takes multiple wchars to encode and due to
3314 // the design of UTF-16 those wchars will not occur as part
3315 // of the encoding of multi-wchar codepoints.
3316 if (isWhite(str[i]))
3317 continue;
3318 break;
3319 }
3320 else static if (C.sizeof == 1)
3321 {
3322 const cx = str[i];
3323 if (cx <= 0x7F)
3324 {
3325 if (isWhite(cx))
3326 continue;
3327 break;
3328 }
3329 else
3330 {
3331 if (i == 0 || (0b1100_0000 & cx) != 0b1000_0000)
3332 break;
3333 const uint d = 0b0011_1111 & cx;
3334 const c2 = str[i - 1];
3335 if ((c2 & 0b1110_0000) == 0b1100_0000) // 2 byte encoding.
3336 {
3337 if (isWhite(d + (uint(c2 & 0b0001_1111) << 6)))
3338 {
3339 i--;
3340 continue;
3341 }
3342 break;
3343 }
3344 if (i == 1 || (c2 & 0b1100_0000) != 0b1000_0000)
3345 break;
3346 const c3 = str[i - 2];
3347 // In UTF-8 all whitespace is encoded in 3 bytes or fewer.
3348 if ((c3 & 0b1111_0000) == 0b1110_0000 &&
3349 isWhite(d + (uint(c2 & 0b0011_1111) << 6) + (uint(c3 & 0b0000_1111) << 12)))
3350 {
3351 i -= 2;
3352 continue;
3353 }
3354 break;
3355 }
3356 }
3357 else
3358 static assert(0);
3359 }
3360
3361 return str[0 .. i + 1];
3362 }
3363 }
3364
3365 ///
3366 nothrow @safe pure
3367 unittest
3368 {
3369 import std.uni : lineSep, paraSep;
3370 assert(stripRight(" hello world ") ==
3371 " hello world");
3372 assert(stripRight("\n\t\v\rhello world\n\t\v\r") ==
3373 "\n\t\v\rhello world");
3374 assert(stripRight("hello world") ==
3375 "hello world");
3376 assert(stripRight([lineSep] ~ "hello world" ~ lineSep) ==
3377 [lineSep] ~ "hello world");
3378 assert(stripRight([paraSep] ~ "hello world" ~ paraSep) ==
3379 [paraSep] ~ "hello world");
3380 }
3381
3382 auto stripRight(Range)(auto ref Range str)
3383 if (isConvertibleToString!Range)
3384 {
3385 return stripRight!(StringTypeOf!Range)(str);
3386 }
3387
3388 @nogc nothrow @safe pure unittest
3389 {
3390 assert(testAliasedString!stripRight("hello "));
3391 }
3392
3393 @safe pure unittest
3394 {
3395 import std.array : array;
3396 import std.uni : lineSep, paraSep;
3397 import std.utf : byChar, byDchar, byUTF, byWchar, invalidUTFstrings;
3398 assert(stripRight(" hello world ".byChar).array == " hello world");
3399 assert(stripRight("\n\t\v\rhello world\n\t\v\r"w.byWchar).array == "\n\t\v\rhello world"w);
3400 assert(stripRight("hello world"d.byDchar).array == "hello world"d);
3401 assert(stripRight("\u2028hello world\u2020\u2028".byChar).array == "\u2028hello world\u2020");
3402 assert(stripRight("hello world\U00010001"w.byWchar).array == "hello world\U00010001"w);
3403
3404 static foreach (C; AliasSeq!(char, wchar, dchar))
3405 {
3406 foreach (s; invalidUTFstrings!C())
3407 {
3408 cast(void) stripRight(s.byUTF!C).array;
3409 }
3410 }
3411
3412 cast(void) stripRight("a\x80".byUTF!char).array;
3413 wstring ws = ['a', cast(wchar) 0xDC00];
3414 cast(void) stripRight(ws.byUTF!wchar).array;
3415 }
3416
3417 /// Ditto
3418 auto stripRight(Range, Char)(Range str, const(Char)[] chars)
3419 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3420 isConvertibleToString!Range) && isSomeChar!Char)
3421 {
3422 static if (isConvertibleToString!Range)
3423 return stripRight!(StringTypeOf!Range)(str, chars);
3424 else
3425 {
3426 for (; !str.empty; str.popBack)
3427 {
3428 if (chars.indexOf(str.back) == -1)
3429 break;
3430 }
3431 return str;
3432 }
3433 }
3434
3435 ///
3436 @safe pure
3437 unittest
3438 {
3439 assert(stripRight(" hello world ", "x") ==
3440 " hello world ");
3441 assert(stripRight(" hello world ", " ") ==
3442 " hello world");
3443 assert(stripRight(" hello worldxy ", "xy ") ==
3444 " hello world");
3445 }
3446
3447 @safe pure unittest
3448 {
3449 assert(testAliasedString!stripRight("hello xyz ", "xyz "));
3450 }
3451
3452 @safe pure unittest
3453 {
3454 import std.array : array;
3455 import std.utf : byChar, byDchar, byUTF, byWchar;
3456
3457 assert(stripRight(" hello world xyz ".byChar,
3458 "xyz ").array == " hello world");
3459 assert(stripRight("\u2028hello world\u2020\u2028"w.byWchar,
3460 "\u2028").array == "\u2028hello world\u2020");
3461 assert(stripRight("hello world\U00010001"w.byWchar,
3462 " ").array == "hello world\U00010001"w);
3463 assert(stripRight("hello world\U00010001 xy"d.byDchar,
3464 "\U00010001 xy").array == "hello world"d);
3465 assert(stripRight("hello\u2020"w, "\u2020"w) == "hello"w);
3466 assert(stripRight("hello\U00010001"d, "\U00010001"d) == "hello"d);
3467 assert(stripRight(" hello ", "") == " hello ");
3468 }
3469
3470
3471 /++
3472 Strips both leading and trailing whitespace (as defined by
3473 $(REF isWhite, std,uni)) or as specified in the second argument.
3474
3475 Params:
3476 str = string or random access range of characters
3477 chars = string of characters to be stripped
3478 leftChars = string of leading characters to be stripped
3479 rightChars = string of trailing characters to be stripped
3480
3481 Returns:
3482 slice of `str` stripped of leading and trailing whitespace
3483 or characters as specified in the second argument.
3484
3485 See_Also:
3486 Generic stripping on ranges: $(REF _strip, std, algorithm, mutation)
3487 +/
3488 auto strip(Range)(Range str)
3489 if (isSomeString!Range ||
3490 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
3491 !isConvertibleToString!Range &&
3492 isSomeChar!(ElementEncodingType!Range))
3493 {
3494 return stripRight(stripLeft(str));
3495 }
3496
3497 ///
3498 @safe pure unittest
3499 {
3500 import std.uni : lineSep, paraSep;
3501 assert(strip(" hello world ") ==
3502 "hello world");
3503 assert(strip("\n\t\v\rhello world\n\t\v\r") ==
3504 "hello world");
3505 assert(strip("hello world") ==
3506 "hello world");
3507 assert(strip([lineSep] ~ "hello world" ~ [lineSep]) ==
3508 "hello world");
3509 assert(strip([paraSep] ~ "hello world" ~ [paraSep]) ==
3510 "hello world");
3511 }
3512
3513 auto strip(Range)(auto ref Range str)
3514 if (isConvertibleToString!Range)
3515 {
3516 return strip!(StringTypeOf!Range)(str);
3517 }
3518
3519 @safe pure unittest
3520 {
3521 assert(testAliasedString!strip(" hello world "));
3522 }
3523
3524 @safe pure unittest
3525 {
3526 import std.algorithm.comparison : equal;
3527 import std.conv : to;
3528 import std.exception : assertCTFEable;
3529
3530 assertCTFEable!(
3531 {
3532 static foreach (S; AliasSeq!( char[], const char[], string,
3533 wchar[], const wchar[], wstring,
3534 dchar[], const dchar[], dstring))
3535 {
3536 assert(equal(stripLeft(to!S(" foo\t ")), "foo\t "));
3537 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007"));
3538 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB \r"));
3539 assert(equal(stripLeft(to!S("1")), "1"));
3540 assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE"));
3541 assert(equal(stripLeft(to!S("")), ""));
3542
3543 assert(equal(stripRight(to!S(" foo\t ")), " foo"));
3544 assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo"));
3545 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB"));
3546 assert(equal(stripRight(to!S("1")), "1"));
3547 assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE"));
3548 assert(equal(stripRight(to!S("")), ""));
3549
3550 assert(equal(strip(to!S(" foo\t ")), "foo"));
3551 assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo"));
3552 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB"));
3553 assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE"));
3554 assert(equal(strip(to!S("")), ""));
3555 }
3556 });
3557 }
3558
3559 @safe pure unittest
3560 {
3561 import std.array : sameHead, sameTail;
3562 import std.exception : assertCTFEable;
3563 assertCTFEable!(
3564 {
3565 wstring s = " ";
3566 assert(s.sameTail(s.stripLeft()));
3567 assert(s.sameHead(s.stripRight()));
3568 });
3569 }
3570
3571 /// Ditto
3572 auto strip(Range, Char)(Range str, const(Char)[] chars)
3573 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3574 isConvertibleToString!Range) && isSomeChar!Char)
3575 {
3576 static if (isConvertibleToString!Range)
3577 return strip!(StringTypeOf!Range)(str, chars);
3578 else
3579 return stripRight(stripLeft(str, chars), chars);
3580 }
3581
3582 ///
3583 @safe pure unittest
3584 {
3585 assert(strip(" hello world ", "x") ==
3586 " hello world ");
3587 assert(strip(" hello world ", " ") ==
3588 "hello world");
3589 assert(strip(" xyxyhello worldxyxy ", "xy ") ==
3590 "hello world");
3591 assert(strip("\u2020hello\u2020"w, "\u2020"w) == "hello"w);
3592 assert(strip("\U00010001hello\U00010001"d, "\U00010001"d) == "hello"d);
3593 assert(strip(" hello ", "") == " hello ");
3594 }
3595
3596 @safe pure unittest
3597 {
3598 assert(testAliasedString!strip(" xyz hello world xyz ", "xyz "));
3599 }
3600
3601 /// Ditto
3602 auto strip(Range, Char)(Range str, const(Char)[] leftChars, const(Char)[] rightChars)
3603 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3604 isConvertibleToString!Range) && isSomeChar!Char)
3605 {
3606 static if (isConvertibleToString!Range)
3607 return strip!(StringTypeOf!Range)(str, leftChars, rightChars);
3608 else
3609 return stripRight(stripLeft(str, leftChars), rightChars);
3610 }
3611
3612 ///
3613 @safe pure unittest
3614 {
3615 assert(strip("xxhelloyy", "x", "y") == "hello");
3616 assert(strip(" xyxyhello worldxyxyzz ", "xy ", "xyz ") ==
3617 "hello world");
3618 assert(strip("\u2020hello\u2028"w, "\u2020"w, "\u2028"w) == "hello"w);
3619 assert(strip("\U00010001hello\U00010002"d, "\U00010001"d, "\U00010002"d) ==
3620 "hello"d);
3621 assert(strip(" hello ", "", "") == " hello ");
3622 }
3623
3624 @safe pure unittest
3625 {
3626 assert(testAliasedString!strip(" xy hello world pq ", "xy ", "pq "));
3627 }
3628
3629 @safe pure unittest
3630 {
3631 import std.algorithm.comparison : equal;
3632 import std.conv : to;
3633 import std.exception : assertCTFEable;
3634
3635 assertCTFEable!(
3636 {
3637 static foreach (S; AliasSeq!( char[], const char[], string,
3638 wchar[], const wchar[], wstring,
3639 dchar[], const dchar[], dstring))
3640 {
3641 assert(equal(stripLeft(to!S(" \tfoo\t "), "\t "), "foo\t "));
3642 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007"), "\u2008 "),
3643 "foo\t \u2007"));
3644 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085 "),
3645 "μ \u0085 \u00BB \r"));
3646 assert(equal(stripLeft(to!S("1"), " "), "1"));
3647 assert(equal(stripLeft(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
3648 assert(equal(stripLeft(to!S(""), " "), ""));
3649
3650 assert(equal(stripRight(to!S(" foo\t "), "\t "), " foo"));
3651 assert(equal(stripRight(to!S("\u2008 foo\t \u2007"), "\u2007\t "),
3652 "\u2008 foo"));
3653 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r"), "\r "),
3654 "\u0085 μ \u0085 \u00BB"));
3655 assert(equal(stripRight(to!S("1"), " "), "1"));
3656 assert(equal(stripRight(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
3657 assert(equal(stripRight(to!S(""), " "), ""));
3658
3659 assert(equal(strip(to!S(" foo\t "), "\t "), "foo"));
3660 assert(equal(strip(to!S("\u2008 foo\t \u2007"), "\u2008\u2007\t "),
3661 "foo"));
3662 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085\r "),
3663 "μ \u0085 \u00BB"));
3664 assert(equal(strip(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
3665 assert(equal(strip(to!S(""), " "), ""));
3666
3667 assert(equal(strip(to!S(" \nfoo\t "), "\n ", "\t "), "foo"));
3668 assert(equal(strip(to!S("\u2008\n foo\t \u2007"),
3669 "\u2008\n ", "\u2007\t "), "foo"));
3670 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB μ \u00BB\r"),
3671 "\u0085 ", "\u00BB\r "), "μ \u0085 \u00BB μ"));
3672 assert(equal(strip(to!S("\U0010FFFE"), " ", " "), "\U0010FFFE"));
3673 assert(equal(strip(to!S(""), " ", " "), ""));
3674 }
3675 });
3676 }
3677
3678 @safe pure unittest
3679 {
3680 import std.array : sameHead, sameTail;
3681 import std.exception : assertCTFEable;
3682 assertCTFEable!(
3683 {
3684 wstring s = " xyz ";
3685 assert(s.sameTail(s.stripLeft(" ")));
3686 assert(s.sameHead(s.stripRight(" ")));
3687 });
3688 }
3689
3690
3691 /++
3692 If `str` ends with `delimiter`, then `str` is returned without
3693 `delimiter` on its end. If it `str` does $(I not) end with
3694 `delimiter`, then it is returned unchanged.
3695
3696 If no `delimiter` is given, then one trailing `'\r'`, `'\n'`,
3697 `"\r\n"`, `'\f'`, `'\v'`, $(REF lineSep, std,uni), $(REF paraSep, std,uni), or $(REF nelSep, std,uni)
3698 is removed from the end of `str`. If `str` does not end with any of those characters,
3699 then it is returned unchanged.
3700
3701 Params:
3702 str = string or indexable range of characters
3703 delimiter = string of characters to be sliced off end of str[]
3704
3705 Returns:
3706 slice of str
3707 +/
3708 Range chomp(Range)(Range str)
3709 if ((isRandomAccessRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3710 isNarrowString!Range) &&
3711 !isConvertibleToString!Range)
3712 {
3713 import std.uni : lineSep, paraSep, nelSep;
3714 if (str.empty)
3715 return str;
3716
3717 alias C = ElementEncodingType!Range;
3718
3719 switch (str[$ - 1])
3720 {
3721 case '\n':
3722 {
3723 if (str.length > 1 && str[$ - 2] == '\r')
3724 return str[0 .. $ - 2];
3725 goto case;
3726 }
3727 case '\r', '\v', '\f':
3728 return str[0 .. $ - 1];
3729
3730 // Pop off the last character if lineSep, paraSep, or nelSep
3731 static if (is(C : const char))
3732 {
3733 /* Manually decode:
3734 * lineSep is E2 80 A8
3735 * paraSep is E2 80 A9
3736 */
3737 case 0xA8: // Last byte of lineSep
3738 case 0xA9: // Last byte of paraSep
3739 if (str.length > 2 && str[$ - 2] == 0x80 && str[$ - 3] == 0xE2)
3740 return str [0 .. $ - 3];
3741 goto default;
3742
3743 /* Manually decode:
3744 * NEL is C2 85
3745 */
3746 case 0x85:
3747 if (str.length > 1 && str[$ - 2] == 0xC2)
3748 return str [0 .. $ - 2];
3749 goto default;
3750 }
3751 else
3752 {
3753 case lineSep:
3754 case paraSep:
3755 case nelSep:
3756 return str[0 .. $ - 1];
3757 }
3758 default:
3759 return str;
3760 }
3761 }
3762
3763 /// Ditto
3764 Range chomp(Range, C2)(Range str, const(C2)[] delimiter)
3765 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3766 isNarrowString!Range) &&
3767 !isConvertibleToString!Range &&
3768 isSomeChar!C2)
3769 {
3770 if (delimiter.empty)
3771 return chomp(str);
3772
3773 alias C1 = ElementEncodingType!Range;
3774
3775 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3776 {
3777 import std.algorithm.searching : endsWith;
3778 if (str.endsWith(delimiter))
3779 return str[0 .. $ - delimiter.length];
3780 return str;
3781 }
3782 else
3783 {
3784 auto orig = str.save;
3785
3786 static if (isSomeString!Range)
3787 alias C = dchar; // because strings auto-decode
3788 else
3789 alias C = C1; // and ranges do not
3790
foreach_reverse(C c;delimiter)3791 foreach_reverse (C c; delimiter)
3792 {
3793 if (str.empty || str.back != c)
3794 return orig;
3795
3796 str.popBack();
3797 }
3798
3799 return str;
3800 }
3801 }
3802
3803 ///
3804 @safe pure
3805 unittest
3806 {
3807 import std.uni : lineSep, paraSep, nelSep;
3808 import std.utf : decode;
3809 assert(chomp(" hello world \n\r") == " hello world \n");
3810 assert(chomp(" hello world \r\n") == " hello world ");
3811 assert(chomp(" hello world \f") == " hello world ");
3812 assert(chomp(" hello world \v") == " hello world ");
3813 assert(chomp(" hello world \n\n") == " hello world \n");
3814 assert(chomp(" hello world \n\n ") == " hello world \n\n ");
3815 assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n");
3816 assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n");
3817 assert(chomp(" hello world \n\n" ~ [ nelSep]) == " hello world \n\n");
3818 assert(chomp(" hello world") == " hello world");
3819 assert(chomp("") == "");
3820
3821 assert(chomp(" hello world", "orld") == " hello w");
3822 assert(chomp(" hello world", " he") == " hello world");
3823 assert(chomp("", "hello") == "");
3824
3825 // Don't decode pointlessly
3826 assert(chomp("hello\xFE", "\r") == "hello\xFE");
3827 }
3828
3829 StringTypeOf!Range chomp(Range)(auto ref Range str)
3830 if (isConvertibleToString!Range)
3831 {
3832 return chomp!(StringTypeOf!Range)(str);
3833 }
3834
3835 StringTypeOf!Range chomp(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3836 if (isConvertibleToString!Range)
3837 {
3838 return chomp!(StringTypeOf!Range, C2)(str, delimiter);
3839 }
3840
3841 @safe pure unittest
3842 {
3843 assert(testAliasedString!chomp(" hello world \n\r"));
3844 assert(testAliasedString!chomp(" hello world", "orld"));
3845 }
3846
3847 @safe pure unittest
3848 {
3849 import std.conv : to;
3850 import std.exception : assertCTFEable;
3851
3852 assertCTFEable!(
3853 {
3854 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3855 {
3856 // @@@ BUG IN COMPILER, MUST INSERT CAST
3857 assert(chomp(cast(S) null) is null);
3858 assert(chomp(to!S("hello")) == "hello");
3859 assert(chomp(to!S("hello\n")) == "hello");
3860 assert(chomp(to!S("hello\r")) == "hello");
3861 assert(chomp(to!S("hello\r\n")) == "hello");
3862 assert(chomp(to!S("hello\n\r")) == "hello\n");
3863 assert(chomp(to!S("hello\n\n")) == "hello\n");
3864 assert(chomp(to!S("hello\r\r")) == "hello\r");
3865 assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx");
3866 assert(chomp(to!S("hello\u2028")) == "hello");
3867 assert(chomp(to!S("hello\u2029")) == "hello");
3868 assert(chomp(to!S("hello\u0085")) == "hello");
3869 assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028");
3870 assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029");
3871 assert(chomp(to!S("hello\u2029\u2129")) == "hello\u2029\u2129");
3872 assert(chomp(to!S("hello\u2029\u0185")) == "hello\u2029\u0185");
3873
3874 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3875 {
3876 // @@@ BUG IN COMPILER, MUST INSERT CAST
3877 assert(chomp(cast(S) null, cast(T) null) is null);
3878 assert(chomp(to!S("hello\n"), cast(T) null) == "hello");
3879 assert(chomp(to!S("hello"), to!T("o")) == "hell");
3880 assert(chomp(to!S("hello"), to!T("p")) == "hello");
3881 // @@@ BUG IN COMPILER, MUST INSERT CAST
3882 assert(chomp(to!S("hello"), cast(T) null) == "hello");
3883 assert(chomp(to!S("hello"), to!T("llo")) == "he");
3884 assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e");
3885 assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e");
3886 }
3887 }
3888 });
3889
3890 // Ranges
3891 import std.array : array;
3892 import std.utf : byChar, byWchar, byDchar;
3893 assert(chomp("hello world\r\n" .byChar ).array == "hello world");
3894 assert(chomp("hello world\r\n"w.byWchar).array == "hello world"w);
3895 assert(chomp("hello world\r\n"d.byDchar).array == "hello world"d);
3896
3897 assert(chomp("hello world"d.byDchar, "ld").array == "hello wor"d);
3898
3899 assert(chomp("hello\u2020" .byChar , "\u2020").array == "hello");
3900 assert(chomp("hello\u2020"d.byDchar, "\u2020"d).array == "hello"d);
3901 }
3902
3903
3904 /++
3905 If `str` starts with `delimiter`, then the part of `str` following
3906 `delimiter` is returned. If `str` does $(I not) start with
3907
3908 `delimiter`, then it is returned unchanged.
3909
3910 Params:
3911 str = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3912 of characters
3913 delimiter = string of characters to be sliced off front of str[]
3914
3915 Returns:
3916 slice of str
3917 +/
3918 Range chompPrefix(Range, C2)(Range str, const(C2)[] delimiter)
3919 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3920 isNarrowString!Range) &&
3921 !isConvertibleToString!Range &&
3922 isSomeChar!C2)
3923 {
3924 alias C1 = ElementEncodingType!Range;
3925
3926 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3927 {
3928 import std.algorithm.searching : startsWith;
3929 if (str.startsWith(delimiter))
3930 return str[delimiter.length .. $];
3931 return str;
3932 }
3933 else
3934 {
3935 auto orig = str.save;
3936
3937 static if (isSomeString!Range)
3938 alias C = dchar; // because strings auto-decode
3939 else
3940 alias C = C1; // and ranges do not
3941
foreach(C c;delimiter)3942 foreach (C c; delimiter)
3943 {
3944 if (str.empty || str.front != c)
3945 return orig;
3946
3947 str.popFront();
3948 }
3949
3950 return str;
3951 }
3952 }
3953
3954 ///
3955 @safe pure unittest
3956 {
3957 assert(chompPrefix("hello world", "he") == "llo world");
3958 assert(chompPrefix("hello world", "hello w") == "orld");
3959 assert(chompPrefix("hello world", " world") == "hello world");
3960 assert(chompPrefix("", "hello") == "");
3961 }
3962
3963 StringTypeOf!Range chompPrefix(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3964 if (isConvertibleToString!Range)
3965 {
3966 return chompPrefix!(StringTypeOf!Range, C2)(str, delimiter);
3967 }
3968
3969 @safe pure
3970 unittest
3971 {
3972 import std.algorithm.comparison : equal;
3973 import std.conv : to;
3974 import std.exception : assertCTFEable;
3975 assertCTFEable!(
3976 {
3977 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3978 {
3979 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3980 {
3981 assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh"));
3982 assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde"));
3983 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), ""));
3984 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co"));
3985 assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el"));
3986 }
3987 }
3988 });
3989
3990 // Ranges
3991 import std.array : array;
3992 import std.utf : byChar, byWchar, byDchar;
3993 assert(chompPrefix("hello world" .byChar , "hello"d).array == " world");
3994 assert(chompPrefix("hello world"w.byWchar, "hello" ).array == " world"w);
3995 assert(chompPrefix("hello world"d.byDchar, "hello"w).array == " world"d);
3996 assert(chompPrefix("hello world"c.byDchar, "hello"w).array == " world"d);
3997
3998 assert(chompPrefix("hello world"d.byDchar, "lx").array == "hello world"d);
3999 assert(chompPrefix("hello world"d.byDchar, "hello world xx").array == "hello world"d);
4000
4001 assert(chompPrefix("\u2020world" .byChar , "\u2020").array == "world");
4002 assert(chompPrefix("\u2020world"d.byDchar, "\u2020"d).array == "world"d);
4003 }
4004
4005 @safe pure unittest
4006 {
4007 assert(testAliasedString!chompPrefix("hello world", "hello"));
4008 }
4009
4010 /++
4011 Returns `str` without its last character, if there is one. If `str`
4012 ends with `"\r\n"`, then both are removed. If `str` is empty, then
4013 it is returned unchanged.
4014
4015 Params:
4016 str = string (must be valid UTF)
4017 Returns:
4018 slice of str
4019 +/
4020
4021 Range chop(Range)(Range str)
4022 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
4023 isNarrowString!Range) &&
4024 !isConvertibleToString!Range)
4025 {
4026 if (str.empty)
4027 return str;
4028
4029 static if (isSomeString!Range)
4030 {
4031 if (str.length >= 2 && str[$ - 1] == '\n' && str[$ - 2] == '\r')
4032 return str[0 .. $ - 2];
4033 str.popBack();
4034 return str;
4035 }
4036 else
4037 {
4038 alias C = Unqual!(ElementEncodingType!Range);
4039 C c = str.back;
4040 str.popBack();
4041 if (c == '\n')
4042 {
4043 if (!str.empty && str.back == '\r')
4044 str.popBack();
4045 return str;
4046 }
4047 // Pop back a dchar, not just a code unit
4048 static if (C.sizeof == 1)
4049 {
4050 int cnt = 1;
4051 while ((c & 0xC0) == 0x80)
4052 {
4053 if (str.empty)
4054 break;
4055 c = str.back;
4056 str.popBack();
4057 if (++cnt > 4)
4058 break;
4059 }
4060 }
4061 else static if (C.sizeof == 2)
4062 {
4063 if (c >= 0xD800 && c <= 0xDBFF)
4064 {
4065 if (!str.empty)
4066 str.popBack();
4067 }
4068 }
4069 else static if (C.sizeof == 4)
4070 {
4071 }
4072 else
4073 static assert(0);
4074 return str;
4075 }
4076 }
4077
4078 ///
4079 @safe pure unittest
4080 {
4081 assert(chop("hello world") == "hello worl");
4082 assert(chop("hello world\n") == "hello world");
4083 assert(chop("hello world\r") == "hello world");
4084 assert(chop("hello world\n\r") == "hello world\n");
4085 assert(chop("hello world\r\n") == "hello world");
4086 assert(chop("Walter Bright") == "Walter Brigh");
4087 assert(chop("") == "");
4088 }
4089
4090 StringTypeOf!Range chop(Range)(auto ref Range str)
4091 if (isConvertibleToString!Range)
4092 {
4093 return chop!(StringTypeOf!Range)(str);
4094 }
4095
4096 @safe pure unittest
4097 {
4098 assert(testAliasedString!chop("hello world"));
4099 }
4100
4101 @safe pure unittest
4102 {
4103 import std.array : array;
4104 import std.utf : byChar, byWchar, byDchar, byCodeUnit, invalidUTFstrings;
4105
4106 assert(chop("hello world".byChar).array == "hello worl");
4107 assert(chop("hello world\n"w.byWchar).array == "hello world"w);
4108 assert(chop("hello world\r"d.byDchar).array == "hello world"d);
4109 assert(chop("hello world\n\r".byChar).array == "hello world\n");
4110 assert(chop("hello world\r\n"w.byWchar).array == "hello world"w);
4111 assert(chop("Walter Bright"d.byDchar).array == "Walter Brigh"d);
4112 assert(chop("".byChar).array == "");
4113
4114 assert(chop(`ミツバチと科学者` .byCodeUnit).array == "ミツバチと科学");
4115 assert(chop(`ミツバチと科学者`w.byCodeUnit).array == "ミツバチと科学"w);
4116 assert(chop(`ミツバチと科学者`d.byCodeUnit).array == "ミツバチと科学"d);
4117
4118 auto ca = invalidUTFstrings!char();
foreach(s;ca)4119 foreach (s; ca)
4120 {
4121 foreach (c; chop(s.byCodeUnit))
4122 {
4123 }
4124 }
4125
4126 auto wa = invalidUTFstrings!wchar();
foreach(s;wa)4127 foreach (s; wa)
4128 {
4129 foreach (c; chop(s.byCodeUnit))
4130 {
4131 }
4132 }
4133 }
4134
4135 @safe pure unittest
4136 {
4137 import std.algorithm.comparison : equal;
4138 import std.conv : to;
4139 import std.exception : assertCTFEable;
4140
4141 assertCTFEable!(
4142 {
4143 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4144 {
4145 assert(chop(cast(S) null) is null);
4146 assert(equal(chop(to!S("hello")), "hell"));
4147 assert(equal(chop(to!S("hello\r\n")), "hello"));
4148 assert(equal(chop(to!S("hello\n\r")), "hello\n"));
4149 assert(equal(chop(to!S("Verité")), "Verit"));
4150 assert(equal(chop(to!S(`さいごの果実`)), "さいごの果"));
4151 assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学"));
4152 }
4153 });
4154 }
4155
4156
4157 /++
4158 Left justify `s` in a field `width` characters wide. `fillChar`
4159 is the character that will be used to fill up the space in the field that
4160 `s` doesn't fill.
4161
4162 Params:
4163 s = string
4164 width = minimum field width
4165 fillChar = used to pad end up to `width` characters
4166
4167 Returns:
4168 GC allocated string
4169
4170 See_Also:
4171 $(LREF leftJustifier), which does not allocate
4172 +/
4173 S leftJustify(S)(S s, size_t width, dchar fillChar = ' ')
4174 if (isSomeString!S)
4175 {
4176 import std.array : array;
4177 return leftJustifier(s, width, fillChar).array;
4178 }
4179
4180 ///
4181 @safe pure unittest
4182 {
4183 assert(leftJustify("hello", 7, 'X') == "helloXX");
4184 assert(leftJustify("hello", 2, 'X') == "hello");
4185 assert(leftJustify("hello", 9, 'X') == "helloXXXX");
4186 }
4187
4188 /++
4189 Left justify `s` in a field `width` characters wide. `fillChar`
4190 is the character that will be used to fill up the space in the field that
4191 `s` doesn't fill.
4192
4193 Params:
4194 r = string or range of characters
4195 width = minimum field width
4196 fillChar = used to pad end up to `width` characters
4197
4198 Returns:
4199 a lazy range of the left justified result
4200
4201 See_Also:
4202 $(LREF rightJustifier)
4203 +/
4204
4205 auto leftJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4206 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4207 !isConvertibleToString!Range)
4208 {
4209 alias C = Unqual!(ElementEncodingType!Range);
4210
4211 static if (C.sizeof == 1)
4212 {
4213 import std.utf : byDchar, byChar;
4214 return leftJustifier(r.byDchar, width, fillChar).byChar;
4215 }
4216 else static if (C.sizeof == 2)
4217 {
4218 import std.utf : byDchar, byWchar;
4219 return leftJustifier(r.byDchar, width, fillChar).byWchar;
4220 }
4221 else static if (C.sizeof == 4)
4222 {
4223 static struct Result
4224 {
4225 private:
4226 Range _input;
4227 size_t _width;
4228 dchar _fillChar;
4229 size_t len;
4230
4231 public:
4232
emptyResult4233 @property bool empty()
4234 {
4235 return len >= _width && _input.empty;
4236 }
4237
frontResult4238 @property C front()
4239 {
4240 return _input.empty ? _fillChar : _input.front;
4241 }
4242
popFrontResult4243 void popFront()
4244 {
4245 ++len;
4246 if (!_input.empty)
4247 _input.popFront();
4248 }
4249
4250 static if (isForwardRange!Range)
4251 {
4252 @property typeof(this) save() return scope
4253 {
4254 auto ret = this;
4255 ret._input = _input.save;
4256 return ret;
4257 }
4258 }
4259 }
4260
4261 return Result(r, width, fillChar);
4262 }
4263 else
4264 static assert(0);
4265 }
4266
4267 ///
4268 @safe pure @nogc nothrow
4269 unittest
4270 {
4271 import std.algorithm.comparison : equal;
4272 import std.utf : byChar;
4273 assert(leftJustifier("hello", 2).equal("hello".byChar));
4274 assert(leftJustifier("hello", 7).equal("hello ".byChar));
4275 assert(leftJustifier("hello", 7, 'x').equal("helloxx".byChar));
4276 }
4277
4278 auto leftJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4279 if (isConvertibleToString!Range)
4280 {
4281 return leftJustifier!(StringTypeOf!Range)(r, width, fillChar);
4282 }
4283
4284 @safe pure unittest
4285 {
4286 auto r = "hello".leftJustifier(8);
4287 r.popFront();
4288 auto save = r.save;
4289 r.popFront();
4290 assert(r.front == 'l');
4291 assert(save.front == 'e');
4292 }
4293
4294 @safe pure unittest
4295 {
4296 assert(testAliasedString!leftJustifier("hello", 2));
4297 }
4298
4299 /++
4300 Right justify `s` in a field `width` characters wide. `fillChar`
4301 is the character that will be used to fill up the space in the field that
4302 `s` doesn't fill.
4303
4304 Params:
4305 s = string
4306 width = minimum field width
4307 fillChar = used to pad end up to `width` characters
4308
4309 Returns:
4310 GC allocated string
4311
4312 See_Also:
4313 $(LREF rightJustifier), which does not allocate
4314 +/
4315 S rightJustify(S)(S s, size_t width, dchar fillChar = ' ')
4316 if (isSomeString!S)
4317 {
4318 import std.array : array;
4319 return rightJustifier(s, width, fillChar).array;
4320 }
4321
4322 ///
4323 @safe pure unittest
4324 {
4325 assert(rightJustify("hello", 7, 'X') == "XXhello");
4326 assert(rightJustify("hello", 2, 'X') == "hello");
4327 assert(rightJustify("hello", 9, 'X') == "XXXXhello");
4328 }
4329
4330 /++
4331 Right justify `s` in a field `width` characters wide. `fillChar`
4332 is the character that will be used to fill up the space in the field that
4333 `s` doesn't fill.
4334
4335 Params:
4336 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4337 of characters
4338 width = minimum field width
4339 fillChar = used to pad end up to `width` characters
4340
4341 Returns:
4342 a lazy range of the right justified result
4343
4344 See_Also:
4345 $(LREF leftJustifier)
4346 +/
4347
4348 auto rightJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4349 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4350 !isConvertibleToString!Range)
4351 {
4352 alias C = Unqual!(ElementEncodingType!Range);
4353
4354 static if (C.sizeof == 1)
4355 {
4356 import std.utf : byDchar, byChar;
4357 return rightJustifier(r.byDchar, width, fillChar).byChar;
4358 }
4359 else static if (C.sizeof == 2)
4360 {
4361 import std.utf : byDchar, byWchar;
4362 return rightJustifier(r.byDchar, width, fillChar).byWchar;
4363 }
4364 else static if (C.sizeof == 4)
4365 {
4366 static struct Result
4367 {
4368 private:
4369 Range _input;
4370 size_t _width;
4371 alias nfill = _width; // number of fill characters to prepend
4372 dchar _fillChar;
4373 bool inited;
4374
4375 // Lazy initialization so constructor is trivial and cannot fail
4376 void initialize()
4377 {
4378 // Replace _width with nfill
4379 // (use alias instead of union because CTFE cannot deal with unions)
4380 assert(_width, "width of 0 not allowed");
4381 static if (hasLength!Range)
4382 {
4383 immutable len = _input.length;
4384 nfill = (_width > len) ? _width - len : 0;
4385 }
4386 else
4387 {
4388 // Lookahead to see now many fill characters are needed
4389 import std.range : take;
4390 import std.range.primitives : walkLength;
4391 nfill = _width - walkLength(_input.save.take(_width), _width);
4392 }
4393 inited = true;
4394 }
4395
4396 public:
4397 this(Range input, size_t width, dchar fillChar) pure nothrow
4398 {
4399 _input = input;
4400 _fillChar = fillChar;
4401 _width = width;
4402 }
4403
4404 @property bool empty()
4405 {
4406 return !nfill && _input.empty;
4407 }
4408
4409 @property C front()
4410 {
4411 if (!nfill)
4412 return _input.front; // fast path
4413 if (!inited)
4414 initialize();
4415 return nfill ? _fillChar : _input.front;
4416 }
4417
4418 void popFront()
4419 {
4420 if (!nfill)
4421 _input.popFront(); // fast path
4422 else
4423 {
4424 if (!inited)
4425 initialize();
4426 if (nfill)
4427 --nfill;
4428 else
4429 _input.popFront();
4430 }
4431 }
4432
4433 @property typeof(this) save()
4434 {
4435 auto ret = this;
4436 ret._input = _input.save;
4437 return ret;
4438 }
4439 }
4440
4441 return Result(r, width, fillChar);
4442 }
4443 else
4444 static assert(0, "Invalid character type of " ~ C.stringof);
4445 }
4446
4447 ///
4448 @safe pure @nogc nothrow
4449 unittest
4450 {
4451 import std.algorithm.comparison : equal;
4452 import std.utf : byChar;
4453 assert(rightJustifier("hello", 2).equal("hello".byChar));
4454 assert(rightJustifier("hello", 7).equal(" hello".byChar));
4455 assert(rightJustifier("hello", 7, 'x').equal("xxhello".byChar));
4456 }
4457
4458 auto rightJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4459 if (isConvertibleToString!Range)
4460 {
4461 return rightJustifier!(StringTypeOf!Range)(r, width, fillChar);
4462 }
4463
4464 @safe pure unittest
4465 {
4466 assert(testAliasedString!rightJustifier("hello", 2));
4467 }
4468
4469 @safe pure unittest
4470 {
4471 auto r = "hello"d.rightJustifier(6);
4472 r.popFront();
4473 auto save = r.save;
4474 r.popFront();
4475 assert(r.front == 'e');
4476 assert(save.front == 'h');
4477
4478 auto t = "hello".rightJustifier(7);
4479 t.popFront();
4480 assert(t.front == ' ');
4481 t.popFront();
4482 assert(t.front == 'h');
4483
4484 auto u = "hello"d.rightJustifier(5);
4485 u.popFront();
4486 u.popFront();
4487 u.popFront();
4488 }
4489
4490 /++
4491 Center `s` in a field `width` characters wide. `fillChar`
4492 is the character that will be used to fill up the space in the field that
4493 `s` doesn't fill.
4494
4495 Params:
4496 s = The string to center
4497 width = Width of the field to center `s` in
4498 fillChar = The character to use for filling excess space in the field
4499
4500 Returns:
4501 The resulting _center-justified string. The returned string is
4502 GC-allocated. To avoid GC allocation, use $(LREF centerJustifier)
4503 instead.
4504 +/
4505 S center(S)(S s, size_t width, dchar fillChar = ' ')
4506 if (isSomeString!S)
4507 {
4508 import std.array : array;
4509 return centerJustifier(s, width, fillChar).array;
4510 }
4511
4512 ///
4513 @safe pure unittest
4514 {
4515 assert(center("hello", 7, 'X') == "XhelloX");
4516 assert(center("hello", 2, 'X') == "hello");
4517 assert(center("hello", 9, 'X') == "XXhelloXX");
4518 }
4519
4520 @safe pure
4521 unittest
4522 {
4523 import std.conv : to;
4524 import std.exception : assertCTFEable;
4525
4526 assertCTFEable!(
4527 {
4528 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4529 {{
4530 S s = to!S("hello");
4531
4532 assert(leftJustify(s, 2) == "hello");
4533 assert(rightJustify(s, 2) == "hello");
4534 assert(center(s, 2) == "hello");
4535
4536 assert(leftJustify(s, 7) == "hello ");
4537 assert(rightJustify(s, 7) == " hello");
4538 assert(center(s, 7) == " hello ");
4539
4540 assert(leftJustify(s, 8) == "hello ");
4541 assert(rightJustify(s, 8) == " hello");
4542 assert(center(s, 8) == " hello ");
4543
4544 assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100");
4545 assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello");
4546 assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100");
4547
4548 assert(leftJustify(s, 8, 'ö') == "helloööö");
4549 assert(rightJustify(s, 8, 'ö') == "öööhello");
4550 assert(center(s, 8, 'ö') == "öhelloöö");
4551 }}
4552 });
4553 }
4554
4555 /++
4556 Center justify `r` in a field `width` characters wide. `fillChar`
4557 is the character that will be used to fill up the space in the field that
4558 `r` doesn't fill.
4559
4560 Params:
4561 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4562 of characters
4563 width = minimum field width
4564 fillChar = used to pad end up to `width` characters
4565
4566 Returns:
4567 a lazy range of the center justified result
4568
4569 See_Also:
4570 $(LREF leftJustifier)
4571 $(LREF rightJustifier)
4572 +/
4573
4574 auto centerJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4575 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4576 !isConvertibleToString!Range)
4577 {
4578 alias C = Unqual!(ElementEncodingType!Range);
4579
4580 static if (C.sizeof == 1)
4581 {
4582 import std.utf : byDchar, byChar;
4583 return centerJustifier(r.byDchar, width, fillChar).byChar;
4584 }
4585 else static if (C.sizeof == 2)
4586 {
4587 import std.utf : byDchar, byWchar;
4588 return centerJustifier(r.byDchar, width, fillChar).byWchar;
4589 }
4590 else static if (C.sizeof == 4)
4591 {
4592 import std.range : chain, repeat;
4593 import std.range.primitives : walkLength;
4594
4595 auto len = walkLength(r.save, width);
4596 if (len > width)
4597 len = width;
4598 const nleft = (width - len) / 2;
4599 const nright = width - len - nleft;
4600 return chain(repeat(fillChar, nleft), r, repeat(fillChar, nright));
4601 }
4602 else
4603 static assert(0);
4604 }
4605
4606 ///
4607 @safe pure @nogc nothrow
4608 unittest
4609 {
4610 import std.algorithm.comparison : equal;
4611 import std.utf : byChar;
4612 assert(centerJustifier("hello", 2).equal("hello".byChar));
4613 assert(centerJustifier("hello", 8).equal(" hello ".byChar));
4614 assert(centerJustifier("hello", 7, 'x').equal("xhellox".byChar));
4615 }
4616
4617 auto centerJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4618 if (isConvertibleToString!Range)
4619 {
4620 return centerJustifier!(StringTypeOf!Range)(r, width, fillChar);
4621 }
4622
4623 @safe pure unittest
4624 {
4625 assert(testAliasedString!centerJustifier("hello", 8));
4626 }
4627
4628 @safe unittest
4629 {
4630 static auto byFwdRange(dstring s)
4631 {
4632 static struct FRange
4633 {
4634 @safe:
4635 dstring str;
4636 this(dstring s) { str = s; }
4637 @property bool empty() { return str.length == 0; }
4638 @property dchar front() { return str[0]; }
4639 void popFront() { str = str[1 .. $]; }
4640 @property FRange save() { return this; }
4641 }
4642 return FRange(s);
4643 }
4644
4645 auto r = centerJustifier(byFwdRange("hello"d), 6);
4646 r.popFront();
4647 auto save = r.save;
4648 r.popFront();
4649 assert(r.front == 'l');
4650 assert(save.front == 'e');
4651
4652 auto t = "hello".centerJustifier(7);
4653 t.popFront();
4654 assert(t.front == 'h');
4655 t.popFront();
4656 assert(t.front == 'e');
4657
4658 auto u = byFwdRange("hello"d).centerJustifier(6);
4659 u.popFront();
4660 u.popFront();
4661 u.popFront();
4662 u.popFront();
4663 u.popFront();
4664 u.popFront();
4665 }
4666
4667
4668 /++
4669 Replace each tab character in `s` with the number of spaces necessary
4670 to align the following character at the next tab stop.
4671
4672 Params:
4673 s = string
4674 tabSize = distance between tab stops
4675
4676 Returns:
4677 GC allocated string with tabs replaced with spaces
4678 +/
4679 auto detab(Range)(auto ref Range s, size_t tabSize = 8) pure
4680 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4681 || __traits(compiles, StringTypeOf!Range))
4682 {
4683 import std.array : array;
4684 return detabber(s, tabSize).array;
4685 }
4686
4687 ///
4688 @safe pure unittest
4689 {
4690 assert(detab(" \n\tx", 9) == " \n x");
4691 }
4692
4693 @safe pure unittest
4694 {
4695 static struct TestStruct
4696 {
4697 string s;
4698 alias s this;
4699 }
4700
4701 static struct TestStruct2
4702 {
4703 string s;
4704 alias s this;
4705 @disable this(this);
4706 }
4707
4708 string s = " \n\tx";
4709 string cmp = " \n x";
4710 auto t = TestStruct(s);
4711 assert(detab(t, 9) == cmp);
4712 assert(detab(TestStruct(s), 9) == cmp);
4713 assert(detab(TestStruct(s), 9) == detab(TestStruct(s), 9));
4714 assert(detab(TestStruct2(s), 9) == detab(TestStruct2(s), 9));
4715 assert(detab(TestStruct2(s), 9) == cmp);
4716 }
4717
4718 /++
4719 Replace each tab character in `r` with the number of spaces
4720 necessary to align the following character at the next tab stop.
4721
4722 Params:
4723 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4724 tabSize = distance between tab stops
4725
4726 Returns:
4727 lazy forward range with tabs replaced with spaces
4728 +/
4729 auto detabber(Range)(Range r, size_t tabSize = 8)
4730 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4731 !isConvertibleToString!Range)
4732 {
4733 import std.uni : lineSep, paraSep, nelSep;
4734 import std.utf : codeUnitLimit, decodeFront;
4735
4736 assert(tabSize > 0);
4737
4738 alias C = Unqual!(ElementEncodingType!(Range));
4739
4740 static struct Result
4741 {
4742 private:
4743 Range _input;
4744 size_t _tabSize;
4745 size_t nspaces;
4746 int column;
4747 size_t index;
4748
4749 public:
4750
4751 this(Range input, size_t tabSize)
4752 {
4753 _input = input;
4754 _tabSize = tabSize;
4755 }
4756
4757 static if (isInfinite!(Range))
4758 {
4759 enum bool empty = false;
4760 }
4761 else
4762 {
4763 @property bool empty()
4764 {
4765 return _input.empty && nspaces == 0;
4766 }
4767 }
4768
4769 @property C front()
4770 {
4771 if (nspaces)
4772 return ' ';
4773 static if (isSomeString!(Range))
4774 C c = _input[0];
4775 else
4776 C c = _input.front;
4777 if (index)
4778 return c;
4779 dchar dc;
4780 if (c < codeUnitLimit!(immutable(C)[]))
4781 {
4782 dc = c;
4783 index = 1;
4784 }
4785 else
4786 {
4787 auto r = _input.save;
4788 dc = decodeFront(r, index); // lookahead to decode
4789 }
4790 switch (dc)
4791 {
4792 case '\r':
4793 case '\n':
4794 case paraSep:
4795 case lineSep:
4796 case nelSep:
4797 column = 0;
4798 break;
4799
4800 case '\t':
4801 nspaces = _tabSize - (column % _tabSize);
4802 column += nspaces;
4803 c = ' ';
4804 break;
4805
4806 default:
4807 ++column;
4808 break;
4809 }
4810 return c;
4811 }
4812
4813 void popFront()
4814 {
4815 if (!index)
4816 front;
4817 if (nspaces)
4818 --nspaces;
4819 if (!nspaces)
4820 {
4821 static if (isSomeString!(Range))
4822 _input = _input[1 .. $];
4823 else
4824 _input.popFront();
4825 --index;
4826 }
4827 }
4828
4829 @property typeof(this) save()
4830 {
4831 auto ret = this;
4832 ret._input = _input.save;
4833 return ret;
4834 }
4835 }
4836
4837 return Result(r, tabSize);
4838 }
4839
4840 ///
4841 @safe pure unittest
4842 {
4843 import std.array : array;
4844
4845 assert(detabber(" \n\tx", 9).array == " \n x");
4846 }
4847
4848 auto detabber(Range)(auto ref Range r, size_t tabSize = 8)
4849 if (isConvertibleToString!Range)
4850 {
4851 return detabber!(StringTypeOf!Range)(r, tabSize);
4852 }
4853
4854 @safe pure unittest
4855 {
4856 assert(testAliasedString!detabber( " ab\t asdf ", 8));
4857 }
4858
4859 @safe pure unittest
4860 {
4861 import std.algorithm.comparison : cmp;
4862 import std.conv : to;
4863 import std.exception : assertCTFEable;
4864
4865 assertCTFEable!(
4866 {
4867 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4868 {{
4869 S s = to!S("This \tis\t a fofof\tof list");
4870 assert(cmp(detab(s), "This is a fofof of list") == 0);
4871
4872 assert(detab(cast(S) null) is null);
4873 assert(detab("").empty);
4874 assert(detab("a") == "a");
4875 assert(detab("\t") == " ");
4876 assert(detab("\t", 3) == " ");
4877 assert(detab("\t", 9) == " ");
4878 assert(detab( " ab\t asdf ") == " ab asdf ");
4879 assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf ");
4880 assert(detab("\r\t", 9) == "\r ");
4881 assert(detab("\n\t", 9) == "\n ");
4882 assert(detab("\u0085\t", 9) == "\u0085 ");
4883 assert(detab("\u2028\t", 9) == "\u2028 ");
4884 assert(detab(" \u2029\t", 9) == " \u2029 ");
4885 }}
4886 });
4887 }
4888
4889 ///
4890 @safe pure unittest
4891 {
4892 import std.array : array;
4893 import std.utf : byChar, byWchar;
4894
4895 assert(detabber(" \u2029\t".byChar, 9).array == " \u2029 ");
4896 auto r = "hel\tx".byWchar.detabber();
4897 assert(r.front == 'h');
4898 auto s = r.save;
4899 r.popFront();
4900 r.popFront();
4901 assert(r.front == 'l');
4902 assert(s.front == 'h');
4903 }
4904
4905 /++
4906 Replaces spaces in `s` with the optimal number of tabs.
4907 All spaces and tabs at the end of a line are removed.
4908
4909 Params:
4910 s = String to convert.
4911 tabSize = Tab columns are `tabSize` spaces apart.
4912
4913 Returns:
4914 GC allocated string with spaces replaced with tabs;
4915 use $(LREF entabber) to not allocate.
4916
4917 See_Also:
4918 $(LREF entabber)
4919 +/
4920 auto entab(Range)(Range s, size_t tabSize = 8)
4921 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4922 {
4923 import std.array : array;
4924 return entabber(s, tabSize).array;
4925 }
4926
4927 ///
4928 @safe pure unittest
4929 {
4930 assert(entab(" x \n") == "\tx\n");
4931 }
4932
4933 auto entab(Range)(auto ref Range s, size_t tabSize = 8)
4934 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) &&
4935 is(StringTypeOf!Range))
4936 {
4937 return entab!(StringTypeOf!Range)(s, tabSize);
4938 }
4939
4940 @safe pure unittest
4941 {
4942 assert(testAliasedString!entab(" x \n"));
4943 }
4944
4945 /++
4946 Replaces spaces in range `r` with the optimal number of tabs.
4947 All spaces and tabs at the end of a line are removed.
4948
4949 Params:
4950 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4951 tabSize = distance between tab stops
4952
4953 Returns:
4954 lazy forward range with spaces replaced with tabs
4955
4956 See_Also:
4957 $(LREF entab)
4958 +/
4959 auto entabber(Range)(Range r, size_t tabSize = 8)
4960 if (isForwardRange!Range && !isConvertibleToString!Range)
4961 {
4962 import std.uni : lineSep, paraSep, nelSep;
4963 import std.utf : codeUnitLimit, decodeFront;
4964
4965 assert(tabSize > 0, "tabSize must be greater than 0");
4966 alias C = Unqual!(ElementEncodingType!Range);
4967
4968 static struct Result
4969 {
4970 private:
4971 Range _input;
4972 size_t _tabSize;
4973 size_t nspaces;
4974 size_t ntabs;
4975 int column;
4976 size_t index;
4977
4978 @property C getFront()
4979 {
4980 static if (isSomeString!Range)
4981 return _input[0]; // avoid autodecode
4982 else
4983 return _input.front;
4984 }
4985
4986 public:
4987
4988 this(Range input, size_t tabSize)
4989 {
4990 _input = input;
4991 _tabSize = tabSize;
4992 }
4993
4994 @property bool empty()
4995 {
4996 if (ntabs || nspaces)
4997 return false;
4998
4999 /* Since trailing spaces are removed,
5000 * look ahead for anything that is not a trailing space
5001 */
5002 static if (isSomeString!Range)
5003 {
5004 foreach (c; _input)
5005 {
5006 if (c != ' ' && c != '\t')
5007 return false;
5008 }
5009 return true;
5010 }
5011 else
5012 {
5013 if (_input.empty)
5014 return true;
5015 immutable c = _input.front;
5016 if (c != ' ' && c != '\t')
5017 return false;
5018 auto t = _input.save;
5019 t.popFront();
5020 foreach (c2; t)
5021 {
5022 if (c2 != ' ' && c2 != '\t')
5023 return false;
5024 }
5025 return true;
5026 }
5027 }
5028
5029 @property C front()
5030 {
5031 //writefln(" front(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
5032 if (ntabs)
5033 return '\t';
5034 if (nspaces)
5035 return ' ';
5036 C c = getFront;
5037 if (index)
5038 return c;
5039 dchar dc;
5040 if (c < codeUnitLimit!(immutable(C)[]))
5041 {
5042 index = 1;
5043 dc = c;
5044 if (c == ' ' || c == '\t')
5045 {
5046 // Consume input until a non-blank is encountered
5047 immutable startcol = column;
5048 C cx;
5049 static if (isSomeString!Range)
5050 {
5051 while (1)
5052 {
5053 assert(_input.length, "input did not contain non "
5054 ~ "whitespace character");
5055 cx = _input[0];
5056 if (cx == ' ')
5057 ++column;
5058 else if (cx == '\t')
5059 column += _tabSize - (column % _tabSize);
5060 else
5061 break;
5062 _input = _input[1 .. $];
5063 }
5064 }
5065 else
5066 {
5067 while (1)
5068 {
5069 assert(_input.length, "input did not contain non "
5070 ~ "whitespace character");
5071 cx = _input.front;
5072 if (cx == ' ')
5073 ++column;
5074 else if (cx == '\t')
5075 column += _tabSize - (column % _tabSize);
5076 else
5077 break;
5078 _input.popFront();
5079 }
5080 }
5081 // Compute ntabs+nspaces to get from startcol to column
5082 immutable n = column - startcol;
5083 if (n == 1)
5084 {
5085 nspaces = 1;
5086 }
5087 else
5088 {
5089 ntabs = column / _tabSize - startcol / _tabSize;
5090 if (ntabs == 0)
5091 nspaces = column - startcol;
5092 else
5093 nspaces = column % _tabSize;
5094 }
5095 //writefln("\tstartcol = %s, column = %s, _tabSize = %s", startcol, column, _tabSize);
5096 //writefln("\tntabs = %s, nspaces = %s", ntabs, nspaces);
5097 if (cx < codeUnitLimit!(immutable(C)[]))
5098 {
5099 dc = cx;
5100 index = 1;
5101 }
5102 else
5103 {
5104 auto r = _input.save;
5105 dc = decodeFront(r, index); // lookahead to decode
5106 }
5107 switch (dc)
5108 {
5109 case '\r':
5110 case '\n':
5111 case paraSep:
5112 case lineSep:
5113 case nelSep:
5114 column = 0;
5115 // Spaces followed by newline are ignored
5116 ntabs = 0;
5117 nspaces = 0;
5118 return cx;
5119
5120 default:
5121 ++column;
5122 break;
5123 }
5124 return ntabs ? '\t' : ' ';
5125 }
5126 }
5127 else
5128 {
5129 auto r = _input.save;
5130 dc = decodeFront(r, index); // lookahead to decode
5131 }
5132 //writefln("dc = x%x", dc);
5133 switch (dc)
5134 {
5135 case '\r':
5136 case '\n':
5137 case paraSep:
5138 case lineSep:
5139 case nelSep:
5140 column = 0;
5141 break;
5142
5143 default:
5144 ++column;
5145 break;
5146 }
5147 return c;
5148 }
5149
5150 void popFront()
5151 {
5152 //writefln("popFront(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
5153 if (!index)
5154 front;
5155 if (ntabs)
5156 --ntabs;
5157 else if (nspaces)
5158 --nspaces;
5159 else if (!ntabs && !nspaces)
5160 {
5161 static if (isSomeString!Range)
5162 _input = _input[1 .. $];
5163 else
5164 _input.popFront();
5165 --index;
5166 }
5167 }
5168
5169 @property typeof(this) save()
5170 {
5171 auto ret = this;
5172 ret._input = _input.save;
5173 return ret;
5174 }
5175 }
5176
5177 return Result(r, tabSize);
5178 }
5179
5180 ///
5181 @safe pure unittest
5182 {
5183 import std.array : array;
5184 assert(entabber(" x \n").array == "\tx\n");
5185 }
5186
5187 auto entabber(Range)(auto ref Range r, size_t tabSize = 8)
5188 if (isConvertibleToString!Range)
5189 {
5190 return entabber!(StringTypeOf!Range)(r, tabSize);
5191 }
5192
5193 @safe pure unittest
5194 {
5195 assert(testAliasedString!entabber(" ab asdf ", 8));
5196 }
5197
5198 @safe pure
5199 unittest
5200 {
5201 import std.conv : to;
5202 import std.exception : assertCTFEable;
5203
5204 assertCTFEable!(
5205 {
5206 assert(entab(cast(string) null) is null);
5207 assert(entab("").empty);
5208 assert(entab("a") == "a");
5209 assert(entab(" ") == "");
5210 assert(entab(" x") == "\tx");
5211 assert(entab(" ab asdf ") == " ab\tasdf");
5212 assert(entab(" ab asdf ") == " ab\t asdf");
5213 assert(entab(" ab \t asdf ") == " ab\t asdf");
5214 assert(entab("1234567 \ta") == "1234567\t\ta");
5215 assert(entab("1234567 \ta") == "1234567\t\ta");
5216 assert(entab("1234567 \ta") == "1234567\t\ta");
5217 assert(entab("1234567 \ta") == "1234567\t\ta");
5218 assert(entab("1234567 \ta") == "1234567\t\ta");
5219 assert(entab("1234567 \ta") == "1234567\t\ta");
5220 assert(entab("1234567 \ta") == "1234567\t\ta");
5221 assert(entab("1234567 \ta") == "1234567\t\ta");
5222 assert(entab("1234567 \ta") == "1234567\t\t\ta");
5223
5224 assert(entab("a ") == "a");
5225 assert(entab("a\v") == "a\v");
5226 assert(entab("a\f") == "a\f");
5227 assert(entab("a\n") == "a\n");
5228 assert(entab("a\n\r") == "a\n\r");
5229 assert(entab("a\r\n") == "a\r\n");
5230 assert(entab("a\u2028") == "a\u2028");
5231 assert(entab("a\u2029") == "a\u2029");
5232 assert(entab("a\u0085") == "a\u0085");
5233 assert(entab("a ") == "a");
5234 assert(entab("a\t") == "a");
5235 assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") ==
5236 "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F");
5237 assert(entab(" \naa") == "\naa");
5238 assert(entab(" \r aa") == "\r aa");
5239 assert(entab(" \u2028 aa") == "\u2028 aa");
5240 assert(entab(" \u2029 aa") == "\u2029 aa");
5241 assert(entab(" \u0085 aa") == "\u0085 aa");
5242 });
5243 }
5244
5245 @safe pure
5246 unittest
5247 {
5248 import std.array : array;
5249 import std.utf : byChar;
5250 assert(entabber(" \u0085 aa".byChar).array == "\u0085 aa");
5251 assert(entabber(" \u2028\t aa \t".byChar).array == "\u2028\t aa");
5252
5253 auto r = entabber("1234", 4);
5254 r.popFront();
5255 auto rsave = r.save;
5256 r.popFront();
5257 assert(r.front == '3');
5258 assert(rsave.front == '2');
5259 }
5260
5261
5262 /++
5263 Replaces the characters in `str` which are keys in `transTable` with
5264 their corresponding values in `transTable`. `transTable` is an AA
5265 where its keys are `dchar` and its values are either `dchar` or some
5266 type of string. Also, if `toRemove` is given, the characters in it are
5267 removed from `str` prior to translation. `str` itself is unaltered.
5268 A copy with the changes is returned.
5269
5270 See_Also:
5271 $(LREF tr),
5272 $(REF replace, std,array),
5273 $(REF substitute, std,algorithm,iteration)
5274
5275 Params:
5276 str = The original string.
5277 transTable = The AA indicating which characters to replace and what to
5278 replace them with.
5279 toRemove = The characters to remove from the string.
5280 +/
5281 C1[] translate(C1, C2 = immutable char)(C1[] str,
5282 in dchar[dchar] transTable,
5283 const(C2)[] toRemove = null) @safe pure
5284 if (isSomeChar!C1 && isSomeChar!C2)
5285 {
5286 import std.array : appender;
5287 auto buffer = appender!(C1[])();
5288 translateImpl(str, transTable, toRemove, buffer);
5289 return buffer.data;
5290 }
5291
5292 ///
5293 @safe pure unittest
5294 {
5295 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5296 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5297
5298 assert(translate("hello world", transTable1, "low") == "h5 rd");
5299
5300 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5301 assert(translate("hello world", transTable2) == "h5llorange worangerld");
5302 }
5303
5304 // https://issues.dlang.org/show_bug.cgi?id=13018
5305 @safe pure unittest
5306 {
5307 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5308 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5309
5310 assert(translate("hello world", transTable1, "low") == "h5 rd");
5311
5312 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5313 assert(translate("hello world", transTable2) == "h5llorange worangerld");
5314 }
5315
5316 @system pure unittest
5317 {
5318 import std.conv : to;
5319 import std.exception : assertCTFEable;
5320
5321 assertCTFEable!(
5322 {
5323 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
5324 wchar[], const(wchar)[], immutable(wchar)[],
5325 dchar[], const(dchar)[], immutable(dchar)[]))
5326 {(){ // workaround slow optimizations for large functions
5327 // https://issues.dlang.org/show_bug.cgi?id=2396
5328 assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) ==
5329 to!S("qe55o wor5d"));
5330 assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) ==
5331 to!S("he\U00010143\U00010143l wlr\U00010143d"));
5332 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) ==
5333 to!S("qe55o \U00010143 wor5d"));
5334 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) ==
5335 to!S("hell0 o w0rld"));
5336 assert(translate(to!S("hello world"), cast(dchar[dchar]) null) == to!S("hello world"));
5337
5338 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
5339 wchar[], const(wchar)[], immutable(wchar)[],
5340 dchar[], const(dchar)[], immutable(dchar)[]))
5341 (){ // workaround slow optimizations for large functions
5342 // https://issues.dlang.org/show_bug.cgi?id=2396
5343 static foreach (R; AliasSeq!(dchar[dchar], const dchar[dchar],
5344 immutable dchar[dchar]))
5345 {{
5346 R tt = ['h' : 'q', 'l' : '5'];
5347 assert(translate(to!S("hello world"), tt, to!T("r"))
5348 == to!S("qe55o wo5d"));
5349 assert(translate(to!S("hello world"), tt, to!T("helo"))
5350 == to!S(" wrd"));
5351 assert(translate(to!S("hello world"), tt, to!T("q5"))
5352 == to!S("qe55o wor5d"));
5353 }}
5354 }();
5355
5356 auto s = to!S("hello world");
5357 dchar[dchar] transTable = ['h' : 'q', 'l' : '5'];
5358 static assert(is(typeof(s) == typeof(translate(s, transTable))));
5359 assert(translate(s, transTable) == "qe55o wor5d");
5360 }();}
5361 });
5362 }
5363
5364 /++ Ditto +/
5365 C1[] translate(C1, S, C2 = immutable char)(C1[] str,
5366 in S[dchar] transTable,
5367 const(C2)[] toRemove = null) @safe pure
5368 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2)
5369 {
5370 import std.array : appender;
5371 auto buffer = appender!(C1[])();
5372 translateImpl(str, transTable, toRemove, buffer);
5373 return buffer.data;
5374 }
5375
5376 @system pure unittest
5377 {
5378 import std.conv : to;
5379 import std.exception : assertCTFEable;
5380
5381 assertCTFEable!(
5382 {
5383 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
5384 wchar[], const(wchar)[], immutable(wchar)[],
5385 dchar[], const(dchar)[], immutable(dchar)[]))
5386 {(){ // workaround slow optimizations for large functions
5387 // https://issues.dlang.org/show_bug.cgi?id=2396
5388 assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) ==
5389 to!S("yellowe4242o wor42d"));
5390 assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
5391 to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d"));
5392 assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) ==
5393 to!S("yellowe4242o \U00010143 wor42d"));
5394 assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
5395 to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d"));
5396 assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) ==
5397 to!S("ello \U00010143 world"));
5398 assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) ==
5399 to!S("hello world"));
5400 assert(translate(to!S("hello world"), cast(string[dchar]) null) == to!S("hello world"));
5401
5402 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
5403 wchar[], const(wchar)[], immutable(wchar)[],
5404 dchar[], const(dchar)[], immutable(dchar)[]))
5405 (){ // workaround slow optimizations for large functions
5406 // https://issues.dlang.org/show_bug.cgi?id=2396
5407 static foreach (R; AliasSeq!(string[dchar], const string[dchar],
5408 immutable string[dchar]))
5409 {{
5410 R tt = ['h' : "yellow", 'l' : "42"];
5411 assert(translate(to!S("hello world"), tt, to!T("r")) ==
5412 to!S("yellowe4242o wo42d"));
5413 assert(translate(to!S("hello world"), tt, to!T("helo")) ==
5414 to!S(" wrd"));
5415 assert(translate(to!S("hello world"), tt, to!T("y42")) ==
5416 to!S("yellowe4242o wor42d"));
5417 assert(translate(to!S("hello world"), tt, to!T("hello world")) ==
5418 to!S(""));
5419 assert(translate(to!S("hello world"), tt, to!T("42")) ==
5420 to!S("yellowe4242o wor42d"));
5421 }}
5422 }();
5423
5424 auto s = to!S("hello world");
5425 string[dchar] transTable = ['h' : "silly", 'l' : "putty"];
5426 static assert(is(typeof(s) == typeof(translate(s, transTable))));
5427 assert(translate(s, transTable) == "sillyeputtyputtyo worputtyd");
5428 }();}
5429 });
5430 }
5431
5432 /++
5433 This is an overload of `translate` which takes an existing buffer to write the contents to.
5434
5435 Params:
5436 str = The original string.
5437 transTable = The AA indicating which characters to replace and what to
5438 replace them with.
5439 toRemove = The characters to remove from the string.
5440 buffer = An output range to write the contents to.
5441 +/
5442 void translate(C1, C2 = immutable char, Buffer)(const(C1)[] str,
5443 in dchar[dchar] transTable,
5444 const(C2)[] toRemove,
5445 Buffer buffer)
5446 if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1))
5447 {
5448 translateImpl(str, transTable, toRemove, buffer);
5449 }
5450
5451 ///
5452 @safe pure unittest
5453 {
5454 import std.array : appender;
5455 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5456 auto buffer = appender!(dchar[])();
5457 translate("hello world", transTable1, null, buffer);
5458 assert(buffer.data == "h5ll7 w7rld");
5459
5460 buffer.clear();
5461 translate("hello world", transTable1, "low", buffer);
5462 assert(buffer.data == "h5 rd");
5463
5464 buffer.clear();
5465 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5466 translate("hello world", transTable2, null, buffer);
5467 assert(buffer.data == "h5llorange worangerld");
5468 }
5469
5470 // https://issues.dlang.org/show_bug.cgi?id=13018
5471 @safe pure unittest
5472 {
5473 import std.array : appender;
5474 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5475 auto buffer = appender!(dchar[])();
5476 translate("hello world", transTable1, null, buffer);
5477 assert(buffer.data == "h5ll7 w7rld");
5478
5479 buffer.clear();
5480 translate("hello world", transTable1, "low", buffer);
5481 assert(buffer.data == "h5 rd");
5482
5483 buffer.clear();
5484 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5485 translate("hello world", transTable2, null, buffer);
5486 assert(buffer.data == "h5llorange worangerld");
5487 }
5488
5489 /++ Ditto +/
5490 void translate(C1, S, C2 = immutable char, Buffer)(C1[] str,
5491 in S[dchar] transTable,
5492 const(C2)[] toRemove,
5493 Buffer buffer)
5494 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S))
5495 {
5496 translateImpl(str, transTable, toRemove, buffer);
5497 }
5498
5499 private void translateImpl(C1, T, C2, Buffer)(const(C1)[] str,
5500 scope T transTable,
5501 const(C2)[] toRemove,
5502 Buffer buffer)
5503 {
5504 bool[dchar] removeTable;
5505
5506 foreach (dchar c; toRemove)
5507 removeTable[c] = true;
5508
5509 foreach (dchar c; str)
5510 {
5511 if (c in removeTable)
5512 continue;
5513
5514 auto newC = c in transTable;
5515
5516 if (newC)
5517 put(buffer, *newC);
5518 else
5519 put(buffer, c);
5520 }
5521 }
5522
5523 /++
5524 This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It
5525 will $(I not) work with Unicode. It exists as an optimization for the
5526 cases where Unicode processing is not necessary.
5527
5528 Unlike the other overloads of $(LREF _translate), this one does not take
5529 an AA. Rather, it takes a `string` generated by $(LREF makeTransTable).
5530
5531 The array generated by `makeTransTable` is `256` elements long such that
5532 the index is equal to the ASCII character being replaced and the value is
5533 equal to the character that it's being replaced with. Note that translate
5534 does not decode any of the characters, so you can actually pass it Extended
5535 ASCII characters if you want to (ASCII only actually uses `128`
5536 characters), but be warned that Extended ASCII characters are not valid
5537 Unicode and therefore will result in a `UTFException` being thrown from
5538 most other Phobos functions.
5539
5540 Also, because no decoding occurs, it is possible to use this overload to
5541 translate ASCII characters within a proper UTF-8 string without altering the
5542 other, non-ASCII characters. It's replacing any code unit greater than
5543 `127` with another code unit or replacing any code unit with another code
5544 unit greater than `127` which will cause UTF validation issues.
5545
5546 See_Also:
5547 $(LREF tr),
5548 $(REF replace, std,array),
5549 $(REF substitute, std,algorithm,iteration)
5550
5551 Params:
5552 str = The original string.
5553 transTable = The string indicating which characters to replace and what
5554 to replace them with. It is generated by $(LREF makeTransTable).
5555 toRemove = The characters to remove from the string.
5556 +/
5557 C[] translate(C = immutable char)(scope const(char)[] str, scope const(char)[] transTable,
5558 scope const(char)[] toRemove = null) @trusted pure nothrow
5559 if (is(immutable C == immutable char))
5560 in
5561 {
5562 import std.conv : to;
5563 assert(transTable.length == 256, "transTable had invalid length of " ~
5564 to!string(transTable.length));
5565 }
5566 do
5567 {
5568 bool[256] remTable = false;
5569
5570 foreach (char c; toRemove)
5571 remTable[c] = true;
5572
5573 size_t count = 0;
5574 foreach (char c; str)
5575 {
5576 if (!remTable[c])
5577 ++count;
5578 }
5579
5580 auto buffer = new char[count];
5581
5582 size_t i = 0;
5583 foreach (char c; str)
5584 {
5585 if (!remTable[c])
5586 buffer[i++] = transTable[c];
5587 }
5588
5589 return cast(C[])(buffer);
5590 }
5591
5592 ///
5593 @safe pure nothrow unittest
5594 {
5595 auto transTable1 = makeTrans("eo5", "57q");
5596 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5597
5598 assert(translate("hello world", transTable1, "low") == "h5 rd");
5599 }
5600
5601 /**
5602 * Do same thing as $(LREF makeTransTable) but allocate the translation table
5603 * on the GC heap.
5604 *
5605 * Use $(LREF makeTransTable) instead.
5606 */
5607 string makeTrans(scope const(char)[] from, scope const(char)[] to) @trusted pure nothrow
5608 {
5609 return makeTransTable(from, to)[].idup;
5610 }
5611
5612 ///
5613 @safe pure nothrow unittest
5614 {
5615 auto transTable1 = makeTrans("eo5", "57q");
5616 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5617
5618 assert(translate("hello world", transTable1, "low") == "h5 rd");
5619 }
5620
5621 /*******
5622 * Construct 256 character translation table, where characters in from[] are replaced
5623 * by corresponding characters in to[].
5624 *
5625 * Params:
5626 * from = array of chars, less than or equal to 256 in length
5627 * to = corresponding array of chars to translate to
5628 * Returns:
5629 * translation array
5630 */
5631 char[256] makeTransTable(scope const(char)[] from, scope const(char)[] to) @safe pure nothrow @nogc
5632 in
5633 {
5634 import std.ascii : isASCII;
5635 assert(from.length == to.length, "from.length must match to.length");
5636 assert(from.length <= 256, "from.length must be <= 256");
5637 foreach (char c; from)
5638 assert(isASCII(c),
5639 "all characters in from must be valid ascii character");
5640 foreach (char c; to)
5641 assert(isASCII(c),
5642 "all characters in to must be valid ascii character");
5643 }
5644 do
5645 {
5646 char[256] result = void;
5647
5648 foreach (i; 0 .. result.length)
5649 result[i] = cast(char) i;
5650 foreach (i, c; from)
5651 result[c] = to[i];
5652 return result;
5653 }
5654
5655 ///
5656 @safe pure unittest
5657 {
5658 assert(translate("hello world", makeTransTable("hl", "q5")) == "qe55o wor5d");
5659 assert(translate("hello world", makeTransTable("12345", "67890")) == "hello world");
5660 }
5661
5662 @safe pure unittest
5663 {
5664 import std.conv : to;
5665 import std.exception : assertCTFEable;
5666
5667 assertCTFEable!(
5668 {
5669 static foreach (C; AliasSeq!(char, const char, immutable char))
5670 {{
5671 assert(translate!C("hello world", makeTransTable("hl", "q5")) == to!(C[])("qe55o wor5d"));
5672
5673 auto s = to!(C[])("hello world");
5674 auto transTable = makeTransTable("hl", "q5");
5675 static assert(is(typeof(s) == typeof(translate!C(s, transTable))));
5676 assert(translate(s, transTable) == "qe55o wor5d");
5677 }}
5678
5679 static foreach (S; AliasSeq!(char[], const(char)[], immutable(char)[]))
5680 {
5681 assert(translate(to!S("hello world"), makeTransTable("hl", "q5")) == to!S("qe55o wor5d"));
5682 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5")) ==
5683 to!S("qe55o \U00010143 wor5d"));
5684 assert(translate(to!S("hello world"), makeTransTable("ol", "1o")) == to!S("heoo1 w1rod"));
5685 assert(translate(to!S("hello world"), makeTransTable("", "")) == to!S("hello world"));
5686 assert(translate(to!S("hello world"), makeTransTable("12345", "67890")) == to!S("hello world"));
5687 assert(translate(to!S("hello \U00010143 world"), makeTransTable("12345", "67890")) ==
5688 to!S("hello \U00010143 world"));
5689
5690 static foreach (T; AliasSeq!(char[], const(char)[], immutable(char)[]))
5691 {
5692 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("r")) ==
5693 to!S("qe55o wo5d"));
5694 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5"), to!T("r")) ==
5695 to!S("qe55o \U00010143 wo5d"));
5696 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("helo")) ==
5697 to!S(" wrd"));
5698 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("q5")) ==
5699 to!S("qe55o wor5d"));
5700 }
5701 }
5702 });
5703 }
5704
5705 /++
5706 This is an $(I $(RED ASCII-only)) overload of `translate` which takes an existing buffer to write the contents to.
5707
5708 Params:
5709 str = The original string.
5710 transTable = The string indicating which characters to replace and what
5711 to replace them with. It is generated by $(LREF makeTransTable).
5712 toRemove = The characters to remove from the string.
5713 buffer = An output range to write the contents to.
5714 +/
5715 void translate(C = immutable char, Buffer)(scope const(char)[] str, scope const(char)[] transTable,
5716 scope const(char)[] toRemove, Buffer buffer) @trusted pure
5717 if (is(immutable C == immutable char) && isOutputRange!(Buffer, char))
5718 in
5719 {
5720 assert(transTable.length == 256, format!
5721 "transTable.length %s must equal 256"(transTable.length));
5722 }
5723 do
5724 {
5725 bool[256] remTable = false;
5726
5727 foreach (char c; toRemove)
5728 remTable[c] = true;
5729
5730 foreach (char c; str)
5731 {
5732 if (!remTable[c])
5733 put(buffer, transTable[c]);
5734 }
5735 }
5736
5737 ///
5738 @safe pure unittest
5739 {
5740 import std.array : appender;
5741 auto buffer = appender!(char[])();
5742 auto transTable1 = makeTransTable("eo5", "57q");
5743 translate("hello world", transTable1, null, buffer);
5744 assert(buffer.data == "h5ll7 w7rld");
5745
5746 buffer.clear();
5747 translate("hello world", transTable1, "low", buffer);
5748 assert(buffer.data == "h5 rd");
5749 }
5750
5751 /**********************************************
5752 * Return string that is the 'successor' to s[].
5753 * If the rightmost character is a-zA-Z0-9, it is incremented within
5754 * its case or digits. If it generates a carry, the process is
5755 * repeated with the one to its immediate left.
5756 */
5757
5758 S succ(S)(S s) @safe pure
5759 if (isSomeString!S)
5760 {
5761 import std.ascii : isAlphaNum;
5762
5763 if (s.length && isAlphaNum(s[$ - 1]))
5764 {
5765 auto r = s.dup;
5766 size_t i = r.length - 1;
5767
5768 while (1)
5769 {
5770 dchar c = s[i];
5771 dchar carry;
5772
5773 switch (c)
5774 {
5775 case '9':
5776 c = '0';
5777 carry = '1';
5778 goto Lcarry;
5779 case 'z':
5780 case 'Z':
5781 c -= 'Z' - 'A';
5782 carry = c;
5783 Lcarry:
5784 r[i] = cast(char) c;
5785 if (i == 0)
5786 {
5787 auto t = new typeof(r[0])[r.length + 1];
5788 t[0] = cast(char) carry;
5789 t[1 .. $] = r[];
5790 return t;
5791 }
5792 i--;
5793 break;
5794
5795 default:
5796 if (isAlphaNum(c))
5797 r[i]++;
5798 return r;
5799 }
5800 }
5801 }
5802 return s;
5803 }
5804
5805 ///
5806 @safe pure unittest
5807 {
5808 assert(succ("1") == "2");
5809 assert(succ("9") == "10");
5810 assert(succ("999") == "1000");
5811 assert(succ("zz99") == "aaa00");
5812 }
5813
5814 @safe pure unittest
5815 {
5816 import std.conv : to;
5817 import std.exception : assertCTFEable;
5818
5819 assertCTFEable!(
5820 {
5821 assert(succ(string.init) is null);
5822 assert(succ("!@#$%") == "!@#$%");
5823 assert(succ("1") == "2");
5824 assert(succ("9") == "10");
5825 assert(succ("999") == "1000");
5826 assert(succ("zz99") == "aaa00");
5827 });
5828 }
5829
5830
5831 /++
5832 Replaces the characters in `str` which are in `from` with the
5833 the corresponding characters in `to` and returns the resulting string.
5834
5835 `tr` is based on
5836 $(HTTP pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr),
5837 though it doesn't do everything that the Posix utility does.
5838
5839 Params:
5840 str = The original string.
5841 from = The characters to replace.
5842 to = The characters to replace with.
5843 modifiers = String containing modifiers.
5844
5845 Modifiers:
5846 $(BOOKTABLE,
5847 $(TR $(TD Modifier) $(TD Description))
5848 $(TR $(TD `'c'`) $(TD Complement the list of characters in `from`))
5849 $(TR $(TD `'d'`) $(TD Removes matching characters with no corresponding
5850 replacement in `to`))
5851 $(TR $(TD `'s'`) $(TD Removes adjacent duplicates in the replaced
5852 characters))
5853 )
5854
5855 If the modifier `'d'` is present, then the number of characters in
5856 `to` may be only `0` or `1`.
5857
5858 If the modifier `'d'` is $(I not) present, and `to` is empty, then
5859 `to` is taken to be the same as `from`.
5860
5861 If the modifier `'d'` is $(I not) present, and `to` is shorter than
5862 `from`, then `to` is extended by replicating the last character in
5863 `to`.
5864
5865 Both `from` and `to` may contain ranges using the `'-'` character
5866 (e.g. `"a-d"` is synonymous with `"abcd"`.) Neither accept a leading
5867 `'^'` as meaning the complement of the string (use the `'c'` modifier
5868 for that).
5869
5870 See_Also:
5871 $(LREF translate),
5872 $(REF replace, std,array),
5873 $(REF substitute, std,algorithm,iteration)
5874 +/
5875 C1[] tr(C1, C2, C3, C4 = immutable char)
5876 (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null)
5877 {
5878 import std.array : appender;
5879 import std.conv : conv_to = to;
5880 import std.utf : decode;
5881
5882 bool mod_c;
5883 bool mod_d;
5884 bool mod_s;
5885
5886 foreach (char c; modifiers)
5887 {
5888 switch (c)
5889 {
5890 case 'c': mod_c = 1; break; // complement
5891 case 'd': mod_d = 1; break; // delete unreplaced chars
5892 case 's': mod_s = 1; break; // squeeze duplicated replaced chars
5893 default: assert(false, "modifier must be one of ['c', 'd', 's'] not "
5894 ~ c);
5895 }
5896 }
5897
5898 if (to.empty && !mod_d)
5899 to = conv_to!(typeof(to))(from);
5900
5901 auto result = appender!(C1[])();
5902 bool modified;
5903 dchar lastc;
5904
5905 foreach (dchar c; str)
5906 {
5907 dchar lastf;
5908 dchar lastt;
5909 dchar newc;
5910 int n = 0;
5911
5912 for (size_t i = 0; i < from.length; )
5913 {
5914 immutable f = decode(from, i);
5915 if (f == '-' && lastf != dchar.init && i < from.length)
5916 {
5917 immutable nextf = decode(from, i);
5918 if (lastf <= c && c <= nextf)
5919 {
5920 n += c - lastf - 1;
5921 if (mod_c)
5922 goto Lnotfound;
5923 goto Lfound;
5924 }
5925 n += nextf - lastf;
5926 lastf = lastf.init;
5927 continue;
5928 }
5929
5930 if (c == f)
5931 { if (mod_c)
5932 goto Lnotfound;
5933 goto Lfound;
5934 }
5935 lastf = f;
5936 n++;
5937 }
5938 if (!mod_c)
5939 goto Lnotfound;
5940 n = 0; // consider it 'found' at position 0
5941
5942 Lfound:
5943
5944 // Find the nth character in to[]
5945 dchar nextt;
5946 for (size_t i = 0; i < to.length; )
5947 {
5948 immutable t = decode(to, i);
5949 if (t == '-' && lastt != dchar.init && i < to.length)
5950 {
5951 nextt = decode(to, i);
5952 n -= nextt - lastt;
5953 if (n < 0)
5954 {
5955 newc = nextt + n + 1;
5956 goto Lnewc;
5957 }
5958 lastt = dchar.init;
5959 continue;
5960 }
5961 if (n == 0)
5962 { newc = t;
5963 goto Lnewc;
5964 }
5965 lastt = t;
5966 nextt = t;
5967 n--;
5968 }
5969 if (mod_d)
5970 continue;
5971 newc = nextt;
5972
5973 Lnewc:
5974 if (mod_s && modified && newc == lastc)
5975 continue;
5976 result.put(newc);
5977 assert(newc != dchar.init, "character must not be dchar.init");
5978 modified = true;
5979 lastc = newc;
5980 continue;
5981
5982 Lnotfound:
5983 result.put(c);
5984 lastc = c;
5985 modified = false;
5986 }
5987
5988 return result.data;
5989 }
5990
5991 ///
5992 @safe pure unittest
5993 {
5994 assert(tr("abcdef", "cd", "CD") == "abCDef");
5995 assert(tr("1st March, 2018", "March", "MAR", "s") == "1st MAR, 2018");
5996 assert(tr("abcdef", "ef", "", "d") == "abcd");
5997 assert(tr("14-Jul-87", "a-zA-Z", " ", "cs") == " Jul ");
5998 }
5999
6000 @safe pure unittest
6001 {
6002 import std.algorithm.comparison : equal;
6003 import std.conv : to;
6004 import std.exception : assertCTFEable;
6005
6006 // Complete list of test types; too slow to test'em all
6007 // alias TestTypes = AliasSeq!(
6008 // char[], const( char)[], immutable( char)[],
6009 // wchar[], const(wchar)[], immutable(wchar)[],
6010 // dchar[], const(dchar)[], immutable(dchar)[]);
6011
6012 // Reduced list of test types
6013 alias TestTypes = AliasSeq!(char[], const(wchar)[], immutable(dchar)[]);
6014
6015 assertCTFEable!(
6016 {
6017 foreach (S; TestTypes)
6018 {
6019 foreach (T; TestTypes)
6020 {
6021 foreach (U; TestTypes)
6022 {
6023 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef"));
6024 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef"));
6025 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx"));
6026 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx"));
6027 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx"));
6028 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef"));
6029 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd"));
6030 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye"));
6031 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye"));
6032 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul "));
6033 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc"));
6034 }
6035 }
6036
6037 auto s = to!S("hello world");
6038 static assert(is(typeof(s) == typeof(tr(s, "he", "if"))));
6039 assert(tr(s, "he", "if") == "ifllo world");
6040 }
6041 });
6042 }
6043
6044 @system pure unittest
6045 {
6046 import core.exception : AssertError;
6047 import std.exception : assertThrown;
6048 assertThrown!AssertError(tr("abcdef", "cd", "CD", "X"));
6049 }
6050
6051 /**
6052 * Takes a string `s` and determines if it represents a number. This function
6053 * also takes an optional parameter, `bAllowSep`, which will accept the
6054 * separator characters `','` and `'__'` within the string. But these
6055 * characters should be stripped from the string before using any
6056 * of the conversion functions like `to!int()`, `to!float()`, and etc
6057 * else an error will occur.
6058 *
6059 * Also please note, that no spaces are allowed within the string
6060 * anywhere whether it's a leading, trailing, or embedded space(s),
6061 * thus they too must be stripped from the string before using this
6062 * function, or any of the conversion functions.
6063 *
6064 * Params:
6065 * s = the string or random access range to check
6066 * bAllowSep = accept separator characters or not
6067 *
6068 * Returns:
6069 * `bool`
6070 */
6071 bool isNumeric(S)(S s, bool bAllowSep = false)
6072 if (isSomeString!S ||
6073 (isRandomAccessRange!S &&
6074 hasSlicing!S &&
6075 isSomeChar!(ElementType!S) &&
6076 !isInfinite!S))
6077 {
6078 import std.algorithm.comparison : among;
6079 import std.ascii : isASCII;
6080
6081 // ASCII only case insensitive comparison with two ranges
6082 static bool asciiCmp(S1)(S1 a, string b)
6083 {
6084 import std.algorithm.comparison : equal;
6085 import std.algorithm.iteration : map;
6086 import std.ascii : toLower;
6087 import std.utf : byChar;
6088 return a.map!toLower.equal(b.byChar.map!toLower);
6089 }
6090
6091 // auto-decoding special case, we're only comparing characters
6092 // in the ASCII range so there's no reason to decode
6093 static if (isSomeString!S)
6094 {
6095 import std.utf : byCodeUnit;
6096 auto codeUnits = s.byCodeUnit;
6097 }
6098 else
6099 {
6100 alias codeUnits = s;
6101 }
6102
6103 if (codeUnits.empty)
6104 return false;
6105
6106 // Check for NaN (Not a Number) and for Infinity
6107 if (codeUnits.among!((a, b) => asciiCmp(a.save, b))
6108 ("nan", "nani", "nan+nani", "inf", "-inf"))
6109 return true;
6110
6111 immutable frontResult = codeUnits.front;
6112 if (frontResult == '-' || frontResult == '+')
6113 codeUnits.popFront;
6114
6115 immutable iLen = codeUnits.length;
6116 bool bDecimalPoint, bExponent, bComplex, sawDigits;
6117
6118 for (size_t i = 0; i < iLen; i++)
6119 {
6120 immutable c = codeUnits[i];
6121
6122 if (!c.isASCII)
6123 return false;
6124
6125 // Digits are good, skip to the next character
6126 if (c >= '0' && c <= '9')
6127 {
6128 sawDigits = true;
6129 continue;
6130 }
6131
6132 // Check for the complex type, and if found
6133 // reset the flags for checking the 2nd number.
6134 if (c == '+')
6135 {
6136 if (!i)
6137 return false;
6138 bDecimalPoint = false;
6139 bExponent = false;
6140 bComplex = true;
6141 sawDigits = false;
6142 continue;
6143 }
6144
6145 // Allow only one exponent per number
6146 if (c == 'e' || c == 'E')
6147 {
6148 // A 2nd exponent found, return not a number
6149 if (bExponent || i + 1 >= iLen)
6150 return false;
6151 // Look forward for the sign, and if
6152 // missing then this is not a number.
6153 if (codeUnits[i + 1] != '-' && codeUnits[i + 1] != '+')
6154 return false;
6155 bExponent = true;
6156 i++;
6157 continue;
6158 }
6159 // Allow only one decimal point per number to be used
6160 if (c == '.')
6161 {
6162 // A 2nd decimal point found, return not a number
6163 if (bDecimalPoint)
6164 return false;
6165 bDecimalPoint = true;
6166 continue;
6167 }
6168 // Check for ending literal characters: "f,u,l,i,ul,fi,li",
6169 // and whether they're being used with the correct datatype.
6170 if (i == iLen - 2)
6171 {
6172 if (!sawDigits)
6173 return false;
6174 // Integer Whole Number
6175 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
6176 (!bDecimalPoint && !bExponent && !bComplex))
6177 return true;
6178 // Floating-Point Number
6179 if (codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))("fi", "li") &&
6180 (bDecimalPoint || bExponent || bComplex))
6181 return true;
6182 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
6183 (bDecimalPoint || bExponent || bComplex))
6184 return false;
6185 // Could be a Integer or a Float, thus
6186 // all these suffixes are valid for both
6187 return codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))
6188 ("ul", "fi", "li") != 0;
6189 }
6190 if (i == iLen - 1)
6191 {
6192 if (!sawDigits)
6193 return false;
6194 // Integer Whole Number
6195 if (c.among!('u', 'l', 'U', 'L')() &&
6196 (!bDecimalPoint && !bExponent && !bComplex))
6197 return true;
6198 // Check to see if the last character in the string
6199 // is the required 'i' character
6200 if (bComplex)
6201 return c.among!('i', 'I')() != 0;
6202 // Floating-Point Number
6203 return c.among!('l', 'L', 'f', 'F', 'i', 'I')() != 0;
6204 }
6205 // Check if separators are allowed to be in the numeric string
6206 if (!bAllowSep || !c.among!('_', ',')())
6207 return false;
6208 }
6209
6210 return sawDigits;
6211 }
6212
6213 /**
6214 * Integer Whole Number: (byte, ubyte, short, ushort, int, uint, long, and ulong)
6215 * ['+'|'-']digit(s)[U|L|UL]
6216 */
6217 @safe @nogc pure nothrow unittest
6218 {
6219 assert(isNumeric("123"));
6220 assert(isNumeric("123UL"));
6221 assert(isNumeric("123L"));
6222 assert(isNumeric("+123U"));
6223 assert(isNumeric("-123L"));
6224 }
6225
6226 /**
6227 * Floating-Point Number: (float, double, real, ifloat, idouble, and ireal)
6228 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
6229 * or [nan|nani|inf|-inf]
6230 */
6231 @safe @nogc pure nothrow unittest
6232 {
6233 assert(isNumeric("+123"));
6234 assert(isNumeric("-123.01"));
6235 assert(isNumeric("123.3e-10f"));
6236 assert(isNumeric("123.3e-10fi"));
6237 assert(isNumeric("123.3e-10L"));
6238
6239 assert(isNumeric("nan"));
6240 assert(isNumeric("nani"));
6241 assert(isNumeric("-inf"));
6242 }
6243
6244 /**
6245 * Floating-Point Number: (cfloat, cdouble, and creal)
6246 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+]
6247 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
6248 * or [nan|nani|nan+nani|inf|-inf]
6249 */
6250 @safe @nogc pure nothrow unittest
6251 {
6252 assert(isNumeric("-123e-1+456.9e-10Li"));
6253 assert(isNumeric("+123e+10+456i"));
6254 assert(isNumeric("123+456"));
6255 }
6256
6257 @safe @nogc pure nothrow unittest
6258 {
6259 assert(!isNumeric("F"));
6260 assert(!isNumeric("L"));
6261 assert(!isNumeric("U"));
6262 assert(!isNumeric("i"));
6263 assert(!isNumeric("fi"));
6264 assert(!isNumeric("ul"));
6265 assert(!isNumeric("li"));
6266 assert(!isNumeric("."));
6267 assert(!isNumeric("-"));
6268 assert(!isNumeric("+"));
6269 assert(!isNumeric("e-"));
6270 assert(!isNumeric("e+"));
6271 assert(!isNumeric(".f"));
6272 assert(!isNumeric("e+f"));
6273 assert(!isNumeric("++1"));
6274 assert(!isNumeric(""));
6275 assert(!isNumeric("1E+1E+1"));
6276 assert(!isNumeric("1E1"));
6277 assert(!isNumeric("\x81"));
6278 }
6279
6280 // Test string types
6281 @safe unittest
6282 {
6283 import std.conv : to;
6284
6285 static foreach (T; AliasSeq!(string, char[], wstring, wchar[], dstring, dchar[]))
6286 {
6287 assert("123".to!T.isNumeric());
6288 assert("123UL".to!T.isNumeric());
6289 assert("123fi".to!T.isNumeric());
6290 assert("123li".to!T.isNumeric());
6291 assert(!"--123L".to!T.isNumeric());
6292 }
6293 }
6294
6295 // test ranges
6296 @system pure unittest
6297 {
6298 import std.range : refRange;
6299 import std.utf : byCodeUnit;
6300
6301 assert("123".byCodeUnit.isNumeric());
6302 assert("123UL".byCodeUnit.isNumeric());
6303 assert("123fi".byCodeUnit.isNumeric());
6304 assert("123li".byCodeUnit.isNumeric());
6305 assert(!"--123L".byCodeUnit.isNumeric());
6306
6307 dstring z = "0";
6308 assert(isNumeric(refRange(&z)));
6309
6310 dstring nani = "nani";
6311 assert(isNumeric(refRange(&nani)));
6312 }
6313
6314 /// isNumeric works with CTFE
6315 @safe pure unittest
6316 {
6317 enum a = isNumeric("123.00E-5+1234.45E-12Li");
6318 enum b = isNumeric("12345xxxx890");
6319
6320 static assert( a);
6321 static assert(!b);
6322 }
6323
6324 @system unittest
6325 {
6326 import std.conv : to;
6327 import std.exception : assertCTFEable;
6328
6329 assertCTFEable!(
6330 {
6331 // Test the isNumeric(in string) function
6332 assert(isNumeric("1") == true );
6333 assert(isNumeric("1.0") == true );
6334 assert(isNumeric("1e-1") == true );
6335 assert(isNumeric("12345xxxx890") == false );
6336 assert(isNumeric("567L") == true );
6337 assert(isNumeric("23UL") == true );
6338 assert(isNumeric("-123..56f") == false );
6339 assert(isNumeric("12.3.5.6") == false );
6340 assert(isNumeric(" 12.356") == false );
6341 assert(isNumeric("123 5.6") == false );
6342 assert(isNumeric("1233E-1+1.0e-1i") == true );
6343
6344 assert(isNumeric("123.00E-5+1234.45E-12Li") == true);
6345 assert(isNumeric("123.00e-5+1234.45E-12iL") == false);
6346 assert(isNumeric("123.00e-5+1234.45e-12uL") == false);
6347 assert(isNumeric("123.00E-5+1234.45e-12lu") == false);
6348
6349 assert(isNumeric("123fi") == true);
6350 assert(isNumeric("123li") == true);
6351 assert(isNumeric("--123L") == false);
6352 assert(isNumeric("+123.5UL") == false);
6353 assert(isNumeric("123f") == true);
6354 assert(isNumeric("123.u") == false);
6355
6356 // @@@BUG@@ to!string(float) is not CTFEable.
6357 // Related: formatValue(T) if (is(FloatingPointTypeOf!T))
6358 if (!__ctfe)
6359 {
6360 assert(isNumeric(to!string(real.nan)) == true);
6361 assert(isNumeric(to!string(-real.infinity)) == true);
6362 }
6363
6364 string s = "$250.99-";
6365 assert(isNumeric(s[1 .. s.length - 2]) == true);
6366 assert(isNumeric(s) == false);
6367 assert(isNumeric(s[0 .. s.length - 1]) == false);
6368 });
6369
6370 assert(!isNumeric("-"));
6371 assert(!isNumeric("+"));
6372 }
6373
6374 /*****************************
6375 * Soundex algorithm.
6376 *
6377 * The Soundex algorithm converts a word into 4 characters
6378 * based on how the word sounds phonetically. The idea is that
6379 * two spellings that sound alike will have the same Soundex
6380 * value, which means that Soundex can be used for fuzzy matching
6381 * of names.
6382 *
6383 * Params:
6384 * str = String or InputRange to convert to Soundex representation.
6385 *
6386 * Returns:
6387 * The four character array with the Soundex result in it.
6388 * The array has zero's in it if there is no Soundex representation for the string.
6389 *
6390 * See_Also:
6391 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia),
6392 * $(LUCKY The Soundex Indexing System)
6393 * $(LREF soundex)
6394 *
6395 * Note:
6396 * Only works well with English names.
6397 */
6398 char[4] soundexer(Range)(Range str)
6399 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
6400 !isConvertibleToString!Range)
6401 {
6402 alias C = Unqual!(ElementEncodingType!Range);
6403
6404 static immutable dex =
6405 // ABCDEFGHIJKLMNOPQRSTUVWXYZ
6406 "01230120022455012623010202";
6407
6408 char[4] result = void;
6409 size_t b = 0;
6410 C lastc;
6411 foreach (C c; str)
6412 {
6413 if (c >= 'a' && c <= 'z')
6414 c -= 'a' - 'A';
6415 else if (c >= 'A' && c <= 'Z')
6416 {
6417 }
6418 else
6419 {
6420 lastc = lastc.init;
6421 continue;
6422 }
6423 if (b == 0)
6424 {
6425 result[0] = cast(char) c;
6426 b++;
6427 lastc = dex[c - 'A'];
6428 }
6429 else
6430 {
6431 if (c == 'H' || c == 'W')
6432 continue;
6433 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
6434 lastc = lastc.init;
6435 c = dex[c - 'A'];
6436 if (c != '0' && c != lastc)
6437 {
6438 result[b] = cast(char) c;
6439 b++;
6440 lastc = c;
6441 }
6442 if (b == 4)
6443 goto Lret;
6444 }
6445 }
6446 if (b == 0)
6447 result[] = 0;
6448 else
6449 result[b .. 4] = '0';
6450 Lret:
6451 return result;
6452 }
6453
6454 /// ditto
6455 char[4] soundexer(Range)(auto ref Range str)
6456 if (isConvertibleToString!Range)
6457 {
6458 return soundexer!(StringTypeOf!Range)(str);
6459 }
6460
6461 ///
6462 @safe unittest
6463 {
6464 assert(soundexer("Gauss") == "G200");
6465 assert(soundexer("Ghosh") == "G200");
6466
6467 assert(soundexer("Robert") == "R163");
6468 assert(soundexer("Rupert") == "R163");
6469
6470 assert(soundexer("0123^&^^**&^") == ['\0', '\0', '\0', '\0']);
6471 }
6472
6473 /*****************************
6474 * Like $(LREF soundexer), but with different parameters
6475 * and return value.
6476 *
6477 * Params:
6478 * str = String to convert to Soundex representation.
6479 * buffer = Optional 4 char array to put the resulting Soundex
6480 * characters into. If null, the return value
6481 * buffer will be allocated on the heap.
6482 * Returns:
6483 * The four character array with the Soundex result in it.
6484 * Returns null if there is no Soundex representation for the string.
6485 * See_Also:
6486 * $(LREF soundexer)
6487 */
6488 char[] soundex(scope const(char)[] str, return scope char[] buffer = null)
6489 @safe pure nothrow
6490 in
6491 {
6492 assert(buffer is null || buffer.length >= 4);
6493 }
6494 out (result)
6495 {
6496 if (result !is null)
6497 {
6498 assert(result.length == 4, "Result must have length of 4");
6499 assert(result[0] >= 'A' && result[0] <= 'Z', "The first character of "
6500 ~ " the result must be an upper character not " ~ result);
6501 foreach (char c; result[1 .. 4])
6502 assert(c >= '0' && c <= '6', "the last three character of the"
6503 ~ " result must be number between 0 and 6 not " ~ result);
6504 }
6505 }
6506 do
6507 {
6508 char[4] result = soundexer(str);
6509 if (result[0] == 0)
6510 return null;
6511 if (buffer is null)
6512 buffer = new char[4];
6513 buffer[] = result[];
6514 return buffer;
6515 }
6516
6517 ///
6518 @safe unittest
6519 {
6520 assert(soundex("Gauss") == "G200");
6521 assert(soundex("Ghosh") == "G200");
6522
6523 assert(soundex("Robert") == "R163");
6524 assert(soundex("Rupert") == "R163");
6525
6526 assert(soundex("0123^&^^**&^") == null);
6527 }
6528
6529 @safe pure nothrow unittest
6530 {
6531 import std.exception : assertCTFEable;
6532 assertCTFEable!(
6533 {
6534 char[4] buffer;
6535
6536 assert(soundex(null) == null);
6537 assert(soundex("") == null);
6538 assert(soundex("0123^&^^**&^") == null);
6539 assert(soundex("Euler") == "E460");
6540 assert(soundex(" Ellery ") == "E460");
6541 assert(soundex("Gauss") == "G200");
6542 assert(soundex("Ghosh") == "G200");
6543 assert(soundex("Hilbert") == "H416");
6544 assert(soundex("Heilbronn") == "H416");
6545 assert(soundex("Knuth") == "K530");
6546 assert(soundex("Kant", buffer) == "K530");
6547 assert(soundex("Lloyd") == "L300");
6548 assert(soundex("Ladd") == "L300");
6549 assert(soundex("Lukasiewicz", buffer) == "L222");
6550 assert(soundex("Lissajous") == "L222");
6551 assert(soundex("Robert") == "R163");
6552 assert(soundex("Rupert") == "R163");
6553 assert(soundex("Rubin") == "R150");
6554 assert(soundex("Washington") == "W252");
6555 assert(soundex("Lee") == "L000");
6556 assert(soundex("Gutierrez") == "G362");
6557 assert(soundex("Pfister") == "P236");
6558 assert(soundex("Jackson") == "J250");
6559 assert(soundex("Tymczak") == "T522");
6560 assert(soundex("Ashcraft") == "A261");
6561
6562 assert(soundex("Woo") == "W000");
6563 assert(soundex("Pilgrim") == "P426");
6564 assert(soundex("Flingjingwaller") == "F452");
6565 assert(soundex("PEARSE") == "P620");
6566 assert(soundex("PIERCE") == "P620");
6567 assert(soundex("Price") == "P620");
6568 assert(soundex("CATHY") == "C300");
6569 assert(soundex("KATHY") == "K300");
6570 assert(soundex("Jones") == "J520");
6571 assert(soundex("johnsons") == "J525");
6572 assert(soundex("Hardin") == "H635");
6573 assert(soundex("Martinez") == "M635");
6574
6575 import std.utf : byChar, byDchar, byWchar;
6576 assert(soundexer("Martinez".byChar ) == "M635");
6577 assert(soundexer("Martinez".byWchar) == "M635");
6578 assert(soundexer("Martinez".byDchar) == "M635");
6579 });
6580 }
6581
6582 @safe pure unittest
6583 {
6584 assert(testAliasedString!soundexer("Martinez"));
6585 }
6586
6587
6588 /***************************************************
6589 * Construct an associative array consisting of all
6590 * abbreviations that uniquely map to the strings in values.
6591 *
6592 * This is useful in cases where the user is expected to type
6593 * in one of a known set of strings, and the program will helpfully
6594 * auto-complete the string once sufficient characters have been
6595 * entered that uniquely identify it.
6596 */
6597 string[string] abbrev(string[] values) @safe pure
6598 {
6599 import std.algorithm.sorting : sort;
6600
6601 string[string] result;
6602
6603 // Make a copy when sorting so we follow COW principles.
6604 values = values.dup;
6605 sort(values);
6606
6607 size_t values_length = values.length;
6608 size_t lasti = values_length;
6609 size_t nexti;
6610
6611 string nv;
6612 string lv;
6613
6614 for (size_t i = 0; i < values_length; i = nexti)
6615 {
6616 string value = values[i];
6617
6618 // Skip dups
6619 for (nexti = i + 1; nexti < values_length; nexti++)
6620 {
6621 nv = values[nexti];
6622 if (value != values[nexti])
6623 break;
6624 }
6625
6626 import std.utf : stride;
6627
6628 for (size_t j = 0; j < value.length; j += stride(value, j))
6629 {
6630 string v = value[0 .. j];
6631
6632 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) &&
6633 (lasti == values_length || j > lv.length || v != lv[0 .. j]))
6634 {
6635 result[v] = value;
6636 }
6637 }
6638 result[value] = value;
6639 lasti = i;
6640 lv = value;
6641 }
6642
6643 return result;
6644 }
6645
6646 ///
6647 @safe unittest
6648 {
6649 import std.string;
6650
6651 static string[] list = [ "food", "foxy" ];
6652 auto abbrevs = abbrev(list);
6653 assert(abbrevs == ["fox": "foxy", "food": "food",
6654 "foxy": "foxy", "foo": "food"]);
6655 }
6656
6657
6658 @system pure unittest
6659 {
6660 import std.algorithm.sorting : sort;
6661 import std.conv : to;
6662 import std.exception : assertCTFEable;
6663
6664 assertCTFEable!(
6665 {
6666 string[] values;
6667 values ~= "hello";
6668 values ~= "hello";
6669 values ~= "he";
6670
6671 string[string] r;
6672
6673 r = abbrev(values);
6674 auto keys = r.keys.dup;
6675 sort(keys);
6676
6677 assert(keys.length == 4);
6678 assert(keys[0] == "he");
6679 assert(keys[1] == "hel");
6680 assert(keys[2] == "hell");
6681 assert(keys[3] == "hello");
6682
6683 assert(r[keys[0]] == "he");
6684 assert(r[keys[1]] == "hello");
6685 assert(r[keys[2]] == "hello");
6686 assert(r[keys[3]] == "hello");
6687 });
6688 }
6689
6690
6691 /******************************************
6692 * Compute _column number at the end of the printed form of the string,
6693 * assuming the string starts in the leftmost _column, which is numbered
6694 * starting from 0.
6695 *
6696 * Tab characters are expanded into enough spaces to bring the _column number
6697 * to the next multiple of tabsize.
6698 * If there are multiple lines in the string, the _column number of the last
6699 * line is returned.
6700 *
6701 * Params:
6702 * str = string or InputRange to be analyzed
6703 * tabsize = number of columns a tab character represents
6704 *
6705 * Returns:
6706 * column number
6707 */
6708
6709 size_t column(Range)(Range str, in size_t tabsize = 8)
6710 if ((isInputRange!Range && isSomeChar!(ElementEncodingType!Range) ||
6711 isNarrowString!Range) &&
6712 !isConvertibleToString!Range)
6713 {
6714 static if (is(immutable ElementEncodingType!Range == immutable char))
6715 {
6716 // decoding needed for chars
6717 import std.utf : byDchar;
6718
6719 return str.byDchar.column(tabsize);
6720 }
6721 else
6722 {
6723 // decoding not needed for wchars and dchars
6724 import std.uni : lineSep, paraSep, nelSep;
6725
6726 size_t column;
6727
6728 foreach (const c; str)
6729 {
6730 switch (c)
6731 {
6732 case '\t':
6733 column = (column + tabsize) / tabsize * tabsize;
6734 break;
6735
6736 case '\r':
6737 case '\n':
6738 case paraSep:
6739 case lineSep:
6740 case nelSep:
6741 column = 0;
6742 break;
6743
6744 default:
6745 column++;
6746 break;
6747 }
6748 }
6749 return column;
6750 }
6751 }
6752
6753 ///
6754 @safe pure unittest
6755 {
6756 import std.utf : byChar, byWchar, byDchar;
6757
6758 assert(column("1234 ") == 5);
6759 assert(column("1234 "w) == 5);
6760 assert(column("1234 "d) == 5);
6761
6762 assert(column("1234 ".byChar()) == 5);
6763 assert(column("1234 "w.byWchar()) == 5);
6764 assert(column("1234 "d.byDchar()) == 5);
6765
6766 // Tab stops are set at 8 spaces by default; tab characters insert enough
6767 // spaces to bring the column position to the next multiple of 8.
6768 assert(column("\t") == 8);
6769 assert(column("1\t") == 8);
6770 assert(column("\t1") == 9);
6771 assert(column("123\t") == 8);
6772
6773 // Other tab widths are possible by specifying it explicitly:
6774 assert(column("\t", 4) == 4);
6775 assert(column("1\t", 4) == 4);
6776 assert(column("\t1", 4) == 5);
6777 assert(column("123\t", 4) == 4);
6778
6779 // New lines reset the column number.
6780 assert(column("abc\n") == 0);
6781 assert(column("abc\n1") == 1);
6782 assert(column("abcdefg\r1234") == 4);
6783 assert(column("abc\u20281") == 1);
6784 assert(column("abc\u20291") == 1);
6785 assert(column("abc\u00851") == 1);
6786 assert(column("abc\u00861") == 5);
6787 }
6788
6789 size_t column(Range)(auto ref Range str, in size_t tabsize = 8)
6790 if (isConvertibleToString!Range)
6791 {
6792 return column!(StringTypeOf!Range)(str, tabsize);
6793 }
6794
6795 @safe pure unittest
6796 {
6797 assert(testAliasedString!column("abc\u00861"));
6798 }
6799
6800 @safe @nogc unittest
6801 {
6802 import std.conv : to;
6803 import std.exception : assertCTFEable;
6804
6805 assertCTFEable!(
6806 {
6807 assert(column(string.init) == 0);
6808 assert(column("") == 0);
6809 assert(column("\t") == 8);
6810 assert(column("abc\t") == 8);
6811 assert(column("12345678\t") == 16);
6812 });
6813 }
6814
6815 /******************************************
6816 * Wrap text into a paragraph.
6817 *
6818 * The input text string s is formed into a paragraph
6819 * by breaking it up into a sequence of lines, delineated
6820 * by \n, such that the number of columns is not exceeded
6821 * on each line.
6822 * The last line is terminated with a \n.
6823 * Params:
6824 * s = text string to be wrapped
6825 * columns = maximum number of _columns in the paragraph
6826 * firstindent = string used to _indent first line of the paragraph
6827 * indent = string to use to _indent following lines of the paragraph
6828 * tabsize = column spacing of tabs in firstindent[] and indent[]
6829 * Returns:
6830 * resulting paragraph as an allocated string
6831 */
6832
6833 S wrap(S)(S s, in size_t columns = 80, S firstindent = null,
6834 S indent = null, in size_t tabsize = 8)
6835 if (isSomeString!S)
6836 {
6837 import std.uni : isWhite;
6838 typeof(s.dup) result;
6839 bool inword;
6840 bool first = true;
6841 size_t wordstart;
6842
6843 const indentcol = column(indent, tabsize);
6844
6845 result.length = firstindent.length + s.length;
6846 result.length = firstindent.length;
6847 result[] = firstindent[];
6848 auto col = column(firstindent, tabsize);
6849 foreach (size_t i, dchar c; s)
6850 {
6851 if (isWhite(c))
6852 {
6853 if (inword)
6854 {
6855 if (first)
6856 {
6857 }
6858 else if (col + 1 + (i - wordstart) > columns)
6859 {
6860 result ~= '\n';
6861 result ~= indent;
6862 col = indentcol;
6863 }
6864 else
6865 {
6866 result ~= ' ';
6867 col += 1;
6868 }
6869 result ~= s[wordstart .. i];
6870 col += i - wordstart;
6871 inword = false;
6872 first = false;
6873 }
6874 }
6875 else
6876 {
6877 if (!inword)
6878 {
6879 wordstart = i;
6880 inword = true;
6881 }
6882 }
6883 }
6884
6885 if (inword)
6886 {
6887 if (col + 1 + (s.length - wordstart) >= columns)
6888 {
6889 result ~= '\n';
6890 result ~= indent;
6891 }
6892 else if (result.length != firstindent.length)
6893 result ~= ' ';
6894 result ~= s[wordstart .. s.length];
6895 }
6896 result ~= '\n';
6897
6898 return result;
6899 }
6900
6901 ///
6902 @safe pure unittest
6903 {
6904 assert(wrap("a short string", 7) == "a short\nstring\n");
6905
6906 // wrap will not break inside of a word, but at the next space
6907 assert(wrap("a short string", 4) == "a\nshort\nstring\n");
6908
6909 assert(wrap("a short string", 7, "\t") == "\ta\nshort\nstring\n");
6910 assert(wrap("a short string", 7, "\t", " ") == "\ta\n short\n string\n");
6911 }
6912
6913 @safe pure unittest
6914 {
6915 import std.conv : to;
6916 import std.exception : assertCTFEable;
6917
6918 assertCTFEable!(
6919 {
6920 assert(wrap(string.init) == "\n");
6921 assert(wrap(" a b df ") == "a b df\n");
6922 assert(wrap(" a b df ", 3) == "a b\ndf\n");
6923 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n");
6924 assert(wrap(" abcd df ", 3) == "abcd\ndf\n");
6925 assert(wrap("x") == "x\n");
6926 assert(wrap("u u") == "u u\n");
6927 assert(wrap("abcd", 3) == "\nabcd\n");
6928 assert(wrap("a de", 10, "\t", " ", 8) == "\ta\n de\n");
6929 });
6930 }
6931
6932 /******************************************
6933 * Removes one level of indentation from a multi-line string.
6934 *
6935 * This uniformly outdents the text as much as possible.
6936 * Whitespace-only lines are always converted to blank lines.
6937 *
6938 * Does not allocate memory if it does not throw.
6939 *
6940 * Params:
6941 * str = multi-line string
6942 *
6943 * Returns:
6944 * outdented string
6945 *
6946 * Throws:
6947 * StringException if indentation is done with different sequences
6948 * of whitespace characters.
6949 */
6950 S outdent(S)(S str) @safe pure
6951 if (isSomeString!S)
6952 {
6953 return str.splitLines(Yes.keepTerminator).outdent().join();
6954 }
6955
6956 ///
6957 @safe pure unittest
6958 {
6959 enum pretty = q{
6960 import std.stdio;
6961 void main() {
6962 writeln("Hello");
6963 }
6964 }.outdent();
6965
6966 enum ugly = q{
6967 import std.stdio;
6968 void main() {
6969 writeln("Hello");
6970 }
6971 };
6972
6973 assert(pretty == ugly);
6974 }
6975
6976
6977 /******************************************
6978 * Removes one level of indentation from an array of single-line strings.
6979 *
6980 * This uniformly outdents the text as much as possible.
6981 * Whitespace-only lines are always converted to blank lines.
6982 *
6983 * Params:
6984 * lines = array of single-line strings
6985 *
6986 * Returns:
6987 * lines[] is rewritten in place with outdented lines
6988 *
6989 * Throws:
6990 * StringException if indentation is done with different sequences
6991 * of whitespace characters.
6992 */
6993 S[] outdent(S)(return scope S[] lines) @safe pure
6994 if (isSomeString!S)
6995 {
6996 import std.algorithm.searching : startsWith;
6997
6998 if (lines.empty)
6999 {
7000 return null;
7001 }
7002
7003 static S leadingWhiteOf(S str)
7004 {
7005 return str[ 0 .. $ - stripLeft(str).length ];
7006 }
7007
7008 S shortestIndent;
7009 foreach (ref line; lines)
7010 {
7011 const stripped = line.stripLeft();
7012
7013 if (stripped.empty)
7014 {
7015 line = line[line.chomp().length .. $];
7016 }
7017 else
7018 {
7019 const indent = leadingWhiteOf(line);
7020
7021 // Comparing number of code units instead of code points is OK here
7022 // because this function throws upon inconsistent indentation.
7023 if (shortestIndent is null || indent.length < shortestIndent.length)
7024 {
7025 if (indent.empty)
7026 return lines;
7027 shortestIndent = indent;
7028 }
7029 }
7030 }
7031
7032 foreach (ref line; lines)
7033 {
7034 const stripped = line.stripLeft();
7035
7036 if (stripped.empty)
7037 {
7038 // Do nothing
7039 }
7040 else if (line.startsWith(shortestIndent))
7041 {
7042 line = line[shortestIndent.length .. $];
7043 }
7044 else
7045 {
7046 throw new StringException("outdent: Inconsistent indentation");
7047 }
7048 }
7049
7050 return lines;
7051 }
7052
7053 ///
7054 @safe pure unittest
7055 {
7056 auto str1 = [
7057 " void main()\n",
7058 " {\n",
7059 " test();\n",
7060 " }\n"
7061 ];
7062 auto str1Expected = [
7063 "void main()\n",
7064 "{\n",
7065 " test();\n",
7066 "}\n"
7067 ];
7068 assert(str1.outdent == str1Expected);
7069
7070 auto str2 = [
7071 "void main()\n",
7072 " {\n",
7073 " test();\n",
7074 " }\n"
7075 ];
7076 assert(str2.outdent == str2);
7077 }
7078
7079 @safe pure unittest
7080 {
7081 import std.conv : to;
7082 import std.exception : assertCTFEable;
7083
7084 template outdent_testStr(S)
7085 {
7086 enum S outdent_testStr =
7087 "
7088 \t\tX
7089 \t\U00010143X
7090 \t\t
7091
7092 \t\t\tX
7093 \t ";
7094 }
7095
7096 template outdent_expected(S)
7097 {
7098 enum S outdent_expected =
7099 "
7100 \tX
7101 \U00010143X
7102
7103
7104 \t\tX
7105 ";
7106 }
7107
7108 assertCTFEable!(
7109 {
7110
7111 static foreach (S; AliasSeq!(string, wstring, dstring))
7112 {{
7113 enum S blank = "";
7114 assert(blank.outdent() == blank);
7115 static assert(blank.outdent() == blank);
7116
7117 enum S testStr1 = " \n \t\n ";
7118 enum S expected1 = "\n\n";
7119 assert(testStr1.outdent() == expected1);
7120 static assert(testStr1.outdent() == expected1);
7121
7122 assert(testStr1[0..$-1].outdent() == expected1);
7123 static assert(testStr1[0..$-1].outdent() == expected1);
7124
7125 enum S testStr2 = "a\n \t\nb";
7126 assert(testStr2.outdent() == testStr2);
7127 static assert(testStr2.outdent() == testStr2);
7128
7129 enum S testStr3 =
7130 "
7131 \t\tX
7132 \t\U00010143X
7133 \t\t
7134
7135 \t\t\tX
7136 \t ";
7137
7138 enum S expected3 =
7139 "
7140 \tX
7141 \U00010143X
7142
7143
7144 \t\tX
7145 ";
7146 assert(testStr3.outdent() == expected3);
7147 static assert(testStr3.outdent() == expected3);
7148
7149 enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X";
7150 enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X";
7151 assert(testStr4.outdent() == expected4);
7152 static assert(testStr4.outdent() == expected4);
7153
7154 enum testStr5 = testStr4[0..$-1];
7155 enum expected5 = expected4[0..$-1];
7156 assert(testStr5.outdent() == expected5);
7157 static assert(testStr5.outdent() == expected5);
7158
7159 enum testStr6 = " \r \n \r\n \u2028 \u2029";
7160 enum expected6 = "\r\n\r\n\u2028\u2029";
7161 assert(testStr6.outdent() == expected6);
7162 static assert(testStr6.outdent() == expected6);
7163
7164 enum testStr7 = " a \n b ";
7165 enum expected7 = "a \nb ";
7166 assert(testStr7.outdent() == expected7);
7167 static assert(testStr7.outdent() == expected7);
7168 }}
7169 });
7170 }
7171
7172 @safe pure unittest
7173 {
7174 import std.exception : assertThrown;
7175 auto bad = " a\n\tb\n c";
7176 assertThrown!StringException(bad.outdent);
7177 }
7178
7179 /** Assume the given array of integers `arr` is a well-formed UTF string and
7180 return it typed as a UTF string.
7181
7182 `ubyte` becomes `char`, `ushort` becomes `wchar` and `uint`
7183 becomes `dchar`. Type qualifiers are preserved.
7184
7185 When compiled with debug mode, this function performs an extra check to make
7186 sure the return value is a valid Unicode string.
7187
7188 Params:
7189 arr = array of bytes, ubytes, shorts, ushorts, ints, or uints
7190
7191 Returns:
7192 arr retyped as an array of chars, wchars, or dchars
7193
7194 Throws:
7195 In debug mode `AssertError`, when the result is not a well-formed UTF string.
7196
7197 See_Also: $(LREF representation)
7198 */
7199 auto assumeUTF(T)(T[] arr)
7200 if (staticIndexOf!(immutable T, immutable ubyte, immutable ushort, immutable uint) != -1)
7201 {
7202 import std.traits : ModifyTypePreservingTQ;
7203 import std.exception : collectException;
7204 import std.utf : validate;
7205
7206 alias ToUTFType(U) = AliasSeq!(char, wchar, dchar)[U.sizeof / 2];
7207 auto asUTF = cast(ModifyTypePreservingTQ!(ToUTFType, T)[]) arr;
7208
7209 debug
7210 {
7211 scope ex = collectException(validate(asUTF));
7212 assert(!ex, ex.msg);
7213 }
7214
7215 return asUTF;
7216 }
7217
7218 ///
7219 @safe pure unittest
7220 {
7221 string a = "Hölo World";
7222 immutable(ubyte)[] b = a.representation;
7223 string c = b.assumeUTF;
7224
7225 assert(c == "Hölo World");
7226 }
7227
7228 pure @system unittest
7229 {
7230 import std.algorithm.comparison : equal;
7231 static foreach (T; AliasSeq!(char[], wchar[], dchar[]))
7232 {{
7233 immutable T jti = "Hello World";
7234 T jt = jti.dup;
7235
7236 static if (is(T == char[]))
7237 {
7238 auto gt = cast(ubyte[]) jt;
7239 auto gtc = cast(const(ubyte)[])jt;
7240 auto gti = cast(immutable(ubyte)[])jt;
7241 }
7242 else static if (is(T == wchar[]))
7243 {
7244 auto gt = cast(ushort[]) jt;
7245 auto gtc = cast(const(ushort)[])jt;
7246 auto gti = cast(immutable(ushort)[])jt;
7247 }
7248 else static if (is(T == dchar[]))
7249 {
7250 auto gt = cast(uint[]) jt;
7251 auto gtc = cast(const(uint)[])jt;
7252 auto gti = cast(immutable(uint)[])jt;
7253 }
7254
7255 auto ht = assumeUTF(gt);
7256 auto htc = assumeUTF(gtc);
7257 auto hti = assumeUTF(gti);
7258 assert(equal(jt, ht));
7259 assert(equal(jt, htc));
7260 assert(equal(jt, hti));
7261 }}
7262 }
7263
7264 pure @system unittest
7265 {
7266 import core.exception : AssertError;
7267 import std.exception : assertThrown, assertNotThrown;
7268
7269 immutable(ubyte)[] a = [ 0xC0 ];
7270
7271 debug
7272 assertThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () );
7273 else
7274 assertNotThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () );
7275 }
7276