1 // Written in the D programming language.
2
3 /**
4 String handling functions.
5
6 $(SCRIPT inhibitQuickIndex = 1;)
7
8 $(DIVC quickindex,
9 $(BOOKTABLE ,
10 $(TR $(TH Category) $(TH Functions) )
11 $(TR $(TDNW Searching)
12 $(TD
13 $(MYREF column)
14 $(MYREF indexOf)
15 $(MYREF indexOfAny)
16 $(MYREF indexOfNeither)
17 $(MYREF lastIndexOf)
18 $(MYREF lastIndexOfAny)
19 $(MYREF lastIndexOfNeither)
20 )
21 )
22 $(TR $(TDNW Comparison)
23 $(TD
24 $(MYREF isNumeric)
25 )
26 )
27 $(TR $(TDNW Mutation)
28 $(TD
29 $(MYREF capitalize)
30 )
31 )
32 $(TR $(TDNW Pruning and Filling)
33 $(TD
34 $(MYREF center)
35 $(MYREF chomp)
36 $(MYREF chompPrefix)
37 $(MYREF chop)
38 $(MYREF detabber)
39 $(MYREF detab)
40 $(MYREF entab)
41 $(MYREF entabber)
42 $(MYREF leftJustify)
43 $(MYREF outdent)
44 $(MYREF rightJustify)
45 $(MYREF strip)
46 $(MYREF stripLeft)
47 $(MYREF stripRight)
48 $(MYREF wrap)
49 )
50 )
51 $(TR $(TDNW Substitution)
52 $(TD
53 $(MYREF abbrev)
54 $(MYREF soundex)
55 $(MYREF soundexer)
56 $(MYREF succ)
57 $(MYREF tr)
58 $(MYREF translate)
59 )
60 )
61 $(TR $(TDNW Miscellaneous)
62 $(TD
63 $(MYREF assumeUTF)
64 $(MYREF fromStringz)
65 $(MYREF lineSplitter)
66 $(MYREF representation)
67 $(MYREF splitLines)
68 $(MYREF toStringz)
69 )
70 )))
71
72 Objects of types $(D _string), $(D wstring), and $(D dstring) are value types
73 and cannot be mutated element-by-element. For using mutation during building
74 strings, use $(D char[]), $(D wchar[]), or $(D dchar[]). The $(D xxxstring)
75 types are preferable because they don't exhibit undesired aliasing, thus
76 making code more robust.
77
78 The following functions are publicly imported:
79
80 $(BOOKTABLE ,
81 $(TR $(TH Module) $(TH Functions) )
82 $(LEADINGROW Publicly imported functions)
83 $(TR $(TD std.algorithm)
84 $(TD
85 $(REF_SHORT cmp, std,algorithm,comparison)
86 $(REF_SHORT count, std,algorithm,searching)
87 $(REF_SHORT endsWith, std,algorithm,searching)
88 $(REF_SHORT startsWith, std,algorithm,searching)
89 ))
90 $(TR $(TD std.array)
91 $(TD
92 $(REF_SHORT join, std,array)
93 $(REF_SHORT replace, std,array)
94 $(REF_SHORT replaceInPlace, std,array)
95 $(REF_SHORT split, std,array)
96 $(REF_SHORT empty, std,array)
97 ))
98 $(TR $(TD std.format)
99 $(TD
100 $(REF_SHORT format, std,format)
101 $(REF_SHORT sformat, std,format)
102 ))
103 $(TR $(TD std.uni)
104 $(TD
105 $(REF_SHORT icmp, std,uni)
106 $(REF_SHORT toLower, std,uni)
107 $(REF_SHORT toLowerInPlace, std,uni)
108 $(REF_SHORT toUpper, std,uni)
109 $(REF_SHORT toUpperInPlace, std,uni)
110 ))
111 )
112
113 There is a rich set of functions for _string handling defined in other modules.
114 Functions related to Unicode and ASCII are found in $(MREF std, uni)
115 and $(MREF std, ascii), respectively. Other functions that have a
116 wider generality than just strings can be found in $(MREF std, algorithm)
117 and $(MREF std, range).
118
119 See_Also:
120 $(LIST
121 $(MREF std, algorithm) and
122 $(MREF std, range)
123 for generic range algorithms
124 ,
125 $(MREF std, ascii)
126 for functions that work with ASCII strings
127 ,
128 $(MREF std, uni)
129 for functions that work with unicode strings
130 )
131
132 Copyright: Copyright Digital Mars 2007-.
133
134 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
135
136 Authors: $(HTTP digitalmars.com, Walter Bright),
137 $(HTTP erdani.org, Andrei Alexandrescu),
138 Jonathan M Davis,
139 and David L. 'SpottedTiger' Davis
140
141 Source: $(PHOBOSSRC std/_string.d)
142
143 */
144 module std.string;
145
version(unittest)146 version (unittest)
147 {
148 private:
149 struct TestAliasedString
150 {
151 string get() @safe @nogc pure nothrow { return _s; }
152 alias get this;
153 @disable this(this);
154 string _s;
155 }
156
157 bool testAliasedString(alias func, Args...)(string s, Args args)
158 {
159 import std.algorithm.comparison : equal;
160 auto a = func(TestAliasedString(s), args);
161 auto b = func(s, args);
162 static if (is(typeof(equal(a, b))))
163 {
164 // For ranges, compare contents instead of object identity.
165 return equal(a, b);
166 }
167 else
168 {
169 return a == b;
170 }
171 }
172 }
173
174 public import std.format : format, sformat;
175 import std.typecons : Flag, Yes, No;
176 public import std.uni : icmp, toLower, toLowerInPlace, toUpper, toUpperInPlace;
177
178 import std.meta; // AliasSeq, staticIndexOf
179 import std.range.primitives; // back, ElementEncodingType, ElementType, front,
180 // hasLength, hasSlicing, isBidirectionalRange, isForwardRange, isInfinite,
181 // isInputRange, isOutputRange, isRandomAccessRange, popBack, popFront, put,
182 // save;
183 import std.traits; // isConvertibleToString, isNarrowString, isSomeChar,
184 // isSomeString, StringTypeOf, Unqual
185
186 //public imports for backward compatibility
187 public import std.algorithm.comparison : cmp;
188 public import std.algorithm.searching : startsWith, endsWith, count;
189 public import std.array : join, replace, replaceInPlace, split, empty;
190
191 /* ************* Exceptions *************** */
192
193 /++
194 Exception thrown on errors in std.string functions.
195 +/
196 class StringException : Exception
197 {
198 import std.exception : basicExceptionCtors;
199
200 ///
201 mixin basicExceptionCtors;
202 }
203
204
205 /++
206 Params:
207 cString = A null-terminated c-style string.
208
209 Returns: A D-style array of $(D char) referencing the same string. The
210 returned array will retain the same type qualifiers as the input.
211
212 $(RED Important Note:) The returned array is a slice of the original buffer.
213 The original data is not changed and not copied.
214 +/
215
inout(char)216 inout(char)[] fromStringz(inout(char)* cString) @nogc @system pure nothrow {
217 import core.stdc.string : strlen;
218 return cString ? cString[0 .. strlen(cString)] : null;
219 }
220
221 ///
222 @system pure unittest
223 {
224 assert(fromStringz(null) == null);
225 assert(fromStringz("foo") == "foo");
226 }
227
228 /++
229 Params:
230 s = A D-style string.
231
232 Returns: A C-style null-terminated string equivalent to $(D s). $(D s)
233 must not contain embedded $(D '\0')'s as any C function will treat the
234 first $(D '\0') that it sees as the end of the string. If $(D s.empty) is
235 $(D true), then a string containing only $(D '\0') is returned.
236
237 $(RED Important Note:) When passing a $(D char*) to a C function, and the C
238 function keeps it around for any reason, make sure that you keep a
239 reference to it in your D code. Otherwise, it may become invalid during a
240 garbage collection cycle and cause a nasty bug when the C code tries to use
241 it.
242 +/
immutable(char)243 immutable(char)* toStringz(const(char)[] s) @trusted pure nothrow
244 out (result)
245 {
246 import core.stdc.string : strlen, memcmp;
247 if (result)
248 {
249 auto slen = s.length;
250 while (slen > 0 && s[slen-1] == 0) --slen;
251 assert(strlen(result) == slen);
252 assert(result[0 .. slen] == s[0 .. slen]);
253 }
254 }
255 body
256 {
257 import std.exception : assumeUnique;
258 /+ Unfortunately, this isn't reliable.
259 We could make this work if string literals are put
260 in read-only memory and we test if s[] is pointing into
261 that.
262
263 /* Peek past end of s[], if it's 0, no conversion necessary.
264 * Note that the compiler will put a 0 past the end of static
265 * strings, and the storage allocator will put a 0 past the end
266 * of newly allocated char[]'s.
267 */
268 char* p = &s[0] + s.length;
269 if (*p == 0)
270 return s;
271 +/
272
273 // Need to make a copy
274 auto copy = new char[s.length + 1];
275 copy[0 .. s.length] = s[];
276 copy[s.length] = 0;
277
278 return &assumeUnique(copy)[0];
279 }
280
281 /++ Ditto +/
immutable(char)282 immutable(char)* toStringz(in string s) @trusted pure nothrow
283 {
284 if (s.empty) return "".ptr;
285 /* Peek past end of s[], if it's 0, no conversion necessary.
286 * Note that the compiler will put a 0 past the end of static
287 * strings, and the storage allocator will put a 0 past the end
288 * of newly allocated char[]'s.
289 */
290 immutable p = s.ptr + s.length;
291 // Is p dereferenceable? A simple test: if the p points to an
292 // address multiple of 4, then conservatively assume the pointer
293 // might be pointing to a new block of memory, which might be
294 // unreadable. Otherwise, it's definitely pointing to valid
295 // memory.
296 if ((cast(size_t) p & 3) && *p == 0)
297 return &s[0];
298 return toStringz(cast(const char[]) s);
299 }
300
301 ///
302 pure nothrow @system unittest
303 {
304 import core.stdc.string : strlen;
305 import std.conv : to;
306
307 auto p = toStringz("foo");
308 assert(strlen(p) == 3);
309 const(char)[] foo = "abbzxyzzy";
310 p = toStringz(foo[3 .. 5]);
311 assert(strlen(p) == 2);
312
313 string test = "";
314 p = toStringz(test);
315 assert(*p == 0);
316
317 test = "\0";
318 p = toStringz(test);
319 assert(*p == 0);
320
321 test = "foo\0";
322 p = toStringz(test);
323 assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0);
324
325 const string test2 = "";
326 p = toStringz(test2);
327 assert(*p == 0);
328 }
329
330
331 /**
332 Flag indicating whether a search is case-sensitive.
333 */
334 alias CaseSensitive = Flag!"caseSensitive";
335
336 /++
337 Searches for character in range.
338
339 Params:
340 s = string or InputRange of characters to search in correct UTF format
341 c = character to search for
342 startIdx = starting index to a well-formed code point
343 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
344
345 Returns:
346 the index of the first occurrence of $(D c) in $(D s) with
347 respect to the start index $(D startIdx). If $(D c)
348 is not found, then $(D -1) is returned.
349 If $(D c) is found the value of the returned index is at least
350 $(D startIdx).
351 If the parameters are not valid UTF, the result will still
352 be in the range [-1 .. s.length], but will not be reliable otherwise.
353
354 Throws:
355 If the sequence starting at $(D startIdx) does not represent a well
356 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
357
358 See_Also: $(REF countUntil, std,algorithm,searching)
359 +/
360 ptrdiff_t indexOf(Range)(Range s, in dchar c,
361 in CaseSensitive cs = Yes.caseSensitive)
362 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
363 !isConvertibleToString!Range)
364 {
365 static import std.ascii;
366 static import std.uni;
367 import std.utf : byDchar, byCodeUnit, UTFException, codeLength;
368 alias Char = Unqual!(ElementEncodingType!Range);
369
370 if (cs == Yes.caseSensitive)
371 {
372 static if (Char.sizeof == 1 && isSomeString!Range)
373 {
374 if (std.ascii.isASCII(c) && !__ctfe)
375 { // Plain old ASCII
trustedmemchr(Range s,char c)376 static ptrdiff_t trustedmemchr(Range s, char c) @trusted
377 {
378 import core.stdc.string : memchr;
379 const p = cast(const(Char)*)memchr(s.ptr, c, s.length);
380 return p ? p - s.ptr : -1;
381 }
382
383 return trustedmemchr(s, cast(char) c);
384 }
385 }
386
387 static if (Char.sizeof == 1)
388 {
389 if (c <= 0x7F)
390 {
391 ptrdiff_t i;
foreach(const c2;s)392 foreach (const c2; s)
393 {
394 if (c == c2)
395 return i;
396 ++i;
397 }
398 }
399 else
400 {
401 ptrdiff_t i;
402 foreach (const c2; s.byDchar())
403 {
404 if (c == c2)
405 return i;
406 i += codeLength!Char(c2);
407 }
408 }
409 }
410 else static if (Char.sizeof == 2)
411 {
412 if (c <= 0xFFFF)
413 {
414 ptrdiff_t i;
foreach(const c2;s)415 foreach (const c2; s)
416 {
417 if (c == c2)
418 return i;
419 ++i;
420 }
421 }
422 else if (c <= 0x10FFFF)
423 {
424 // Encode UTF-16 surrogate pair
425 const wchar c1 = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
426 const wchar c2 = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00);
427 ptrdiff_t i;
428 for (auto r = s.byCodeUnit(); !r.empty; r.popFront())
429 {
430 if (c1 == r.front)
431 {
432 r.popFront();
433 if (r.empty) // invalid UTF - missing second of pair
434 break;
435 if (c2 == r.front)
436 return i;
437 ++i;
438 }
439 ++i;
440 }
441 }
442 }
443 else static if (Char.sizeof == 4)
444 {
445 ptrdiff_t i;
foreach(const c2;s)446 foreach (const c2; s)
447 {
448 if (c == c2)
449 return i;
450 ++i;
451 }
452 }
453 else
454 static assert(0);
455 return -1;
456 }
457 else
458 {
459 if (std.ascii.isASCII(c))
460 { // Plain old ASCII
461 immutable c1 = cast(char) std.ascii.toLower(c);
462
463 ptrdiff_t i;
464 foreach (const c2; s.byCodeUnit())
465 {
466 if (c1 == std.ascii.toLower(c2))
467 return i;
468 ++i;
469 }
470 }
471 else
472 { // c is a universal character
473 immutable c1 = std.uni.toLower(c);
474
475 ptrdiff_t i;
476 foreach (const c2; s.byDchar())
477 {
478 if (c1 == std.uni.toLower(c2))
479 return i;
480 i += codeLength!Char(c2);
481 }
482 }
483 }
484 return -1;
485 }
486
487 /// Ditto
488 ptrdiff_t indexOf(Range)(Range s, in dchar c, in size_t startIdx,
489 in CaseSensitive cs = Yes.caseSensitive)
490 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
491 !isConvertibleToString!Range)
492 {
493 static if (isSomeString!(typeof(s)) ||
494 (hasSlicing!(typeof(s)) && hasLength!(typeof(s))))
495 {
496 if (startIdx < s.length)
497 {
498 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], c, cs);
499 if (foundIdx != -1)
500 {
501 return foundIdx + cast(ptrdiff_t) startIdx;
502 }
503 }
504 }
505 else
506 {
507 foreach (i; 0 .. startIdx)
508 {
509 if (s.empty)
510 return -1;
511 s.popFront();
512 }
513 ptrdiff_t foundIdx = indexOf(s, c, cs);
514 if (foundIdx != -1)
515 {
516 return foundIdx + cast(ptrdiff_t) startIdx;
517 }
518 }
519 return -1;
520 }
521
522 ///
523 @safe pure unittest
524 {
525 import std.typecons : No;
526
527 string s = "Hello World";
528 assert(indexOf(s, 'W') == 6);
529 assert(indexOf(s, 'Z') == -1);
530 assert(indexOf(s, 'w', No.caseSensitive) == 6);
531 }
532
533 ///
534 @safe pure unittest
535 {
536 import std.typecons : No;
537
538 string s = "Hello World";
539 assert(indexOf(s, 'W', 4) == 6);
540 assert(indexOf(s, 'Z', 100) == -1);
541 assert(indexOf(s, 'w', 3, No.caseSensitive) == 6);
542 }
543
544 ptrdiff_t indexOf(Range)(auto ref Range s, in dchar c,
545 in CaseSensitive cs = Yes.caseSensitive)
546 if (isConvertibleToString!Range)
547 {
548 return indexOf!(StringTypeOf!Range)(s, c, cs);
549 }
550
551 ptrdiff_t indexOf(Range)(auto ref Range s, in dchar c, in size_t startIdx,
552 in CaseSensitive cs = Yes.caseSensitive)
553 if (isConvertibleToString!Range)
554 {
555 return indexOf!(StringTypeOf!Range)(s, c, startIdx, cs);
556 }
557
558 @safe pure unittest
559 {
560 assert(testAliasedString!indexOf("std/string.d", '/'));
561 }
562
563 @safe pure unittest
564 {
565 import std.conv : to;
566 import std.exception : assertCTFEable;
567 import std.traits : EnumMembers;
568 import std.utf : byChar, byWchar, byDchar;
569
570 assertCTFEable!(
571 {
572 foreach (S; AliasSeq!(string, wstring, dstring))
573 {
574 assert(indexOf(cast(S) null, cast(dchar)'a') == -1);
575 assert(indexOf(to!S("def"), cast(dchar)'a') == -1);
576 assert(indexOf(to!S("abba"), cast(dchar)'a') == 0);
577 assert(indexOf(to!S("def"), cast(dchar)'f') == 2);
578
579 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
580 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
581 assert(indexOf(to!S("Abba"), cast(dchar)'a', No.caseSensitive) == 0);
582 assert(indexOf(to!S("def"), cast(dchar)'F', No.caseSensitive) == 2);
583 assert(indexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
584
585 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
586 assert(indexOf("def", cast(char)'f', No.caseSensitive) == 2);
587 assert(indexOf(sPlts, cast(char)'P', No.caseSensitive) == 23);
588 assert(indexOf(sPlts, cast(char)'R', No.caseSensitive) == 2);
589 }
590
591 foreach (cs; EnumMembers!CaseSensitive)
592 {
593 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9);
594 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7);
595 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6);
596
597 assert(indexOf("hello\U00010143\u0100\U00010143".byChar, '\u0100', cs) == 9);
598 assert(indexOf("hello\U00010143\u0100\U00010143".byWchar, '\u0100', cs) == 7);
599 assert(indexOf("hello\U00010143\u0100\U00010143".byDchar, '\u0100', cs) == 6);
600
601 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, 'l', cs) == 2);
602 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, '\u0100', cs) == 7);
603 assert(indexOf("hello\U0000EFFF\u0100\U00010143".byChar, '\u0100', cs) == 8);
604
605 assert(indexOf("hello\U00010100".byWchar, '\U00010100', cs) == 5);
606 assert(indexOf("hello\U00010100".byWchar, '\U00010101', cs) == -1);
607 }
608
609 char[10] fixedSizeArray = "0123456789";
610 assert(indexOf(fixedSizeArray, '2') == 2);
611 });
612 }
613
614 @safe pure unittest
615 {
616 assert(testAliasedString!indexOf("std/string.d", '/', 3));
617 }
618
619 @safe pure unittest
620 {
621 import std.conv : to;
622 import std.traits : EnumMembers;
623 import std.utf : byCodeUnit, byChar, byWchar;
624
625 assert("hello".byCodeUnit.indexOf(cast(dchar)'l', 1) == 2);
626 assert("hello".byWchar.indexOf(cast(dchar)'l', 1) == 2);
627 assert("hello".byWchar.indexOf(cast(dchar)'l', 6) == -1);
628
629 foreach (S; AliasSeq!(string, wstring, dstring))
630 {
631 assert(indexOf(cast(S) null, cast(dchar)'a', 1) == -1);
632 assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1);
633 assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3);
634 assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2);
635
636 assert((to!S("def")).indexOf(cast(dchar)'a', 1,
637 No.caseSensitive) == -1);
638 assert(indexOf(to!S("def"), cast(dchar)'a', 1,
639 No.caseSensitive) == -1);
640 assert(indexOf(to!S("def"), cast(dchar)'a', 12,
641 No.caseSensitive) == -1);
642 assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2,
643 No.caseSensitive) == 3);
644 assert(indexOf(to!S("def"), cast(dchar)'F', 2, No.caseSensitive) == 2);
645
646 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
647 assert(indexOf("def", cast(char)'f', cast(uint) 2,
648 No.caseSensitive) == 2);
649 assert(indexOf(sPlts, cast(char)'P', 12, No.caseSensitive) == 23);
650 assert(indexOf(sPlts, cast(char)'R', cast(ulong) 1,
651 No.caseSensitive) == 2);
652 }
653
654 foreach (cs; EnumMembers!CaseSensitive)
655 {
656 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs)
657 == 9);
658 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs)
659 == 7);
660 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs)
661 == 6);
662 }
663 }
664
665 /++
666 Searches for substring in $(D s).
667
668 Params:
669 s = string or ForwardRange of characters to search in correct UTF format
670 sub = substring to search for
671 startIdx = the index into s to start searching from
672 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
673
674 Returns:
675 the index of the first occurrence of $(D sub) in $(D s) with
676 respect to the start index $(D startIdx). If $(D sub) is not found,
677 then $(D -1) is returned.
678 If the arguments are not valid UTF, the result will still
679 be in the range [-1 .. s.length], but will not be reliable otherwise.
680 If $(D sub) is found the value of the returned index is at least
681 $(D startIdx).
682
683 Throws:
684 If the sequence starting at $(D startIdx) does not represent a well
685 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
686
687 Bugs:
688 Does not work with case insensitive strings where the mapping of
689 tolower and toupper is not 1:1.
690 +/
691 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub,
692 in CaseSensitive cs = Yes.caseSensitive)
693 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
694 isSomeChar!Char)
695 {
696 alias Char1 = Unqual!(ElementEncodingType!Range);
697
698 static if (isSomeString!Range)
699 {
700 import std.algorithm.searching : find;
701
702 const(Char1)[] balance;
703 if (cs == Yes.caseSensitive)
704 {
705 balance = find(s, sub);
706 }
707 else
708 {
709 balance = find!
710 ((a, b) => toLower(a) == toLower(b))
711 (s, sub);
712 }
713 return () @trusted { return balance.empty ? -1 : balance.ptr - s.ptr; } ();
714 }
715 else
716 {
717 if (s.empty)
718 return -1;
719 if (sub.empty)
720 return 0; // degenerate case
721
722 import std.utf : byDchar, codeLength;
723 auto subr = sub.byDchar; // decode sub[] by dchar's
724 dchar sub0 = subr.front; // cache first character of sub[]
725 subr.popFront();
726
727 // Special case for single character search
728 if (subr.empty)
729 return indexOf(s, sub0, cs);
730
731 if (cs == No.caseSensitive)
732 sub0 = toLower(sub0);
733
734 /* Classic double nested loop search algorithm
735 */
736 ptrdiff_t index = 0; // count code unit index into s
737 for (auto sbydchar = s.byDchar(); !sbydchar.empty; sbydchar.popFront())
738 {
739 dchar c2 = sbydchar.front;
740 if (cs == No.caseSensitive)
741 c2 = toLower(c2);
742 if (c2 == sub0)
743 {
744 auto s2 = sbydchar.save; // why s must be a forward range
745 foreach (c; subr.save)
746 {
747 s2.popFront();
748 if (s2.empty)
749 return -1;
750 if (cs == Yes.caseSensitive ? c != s2.front
751 : toLower(c) != toLower(s2.front)
752 )
753 goto Lnext;
754 }
755 return index;
756 }
757 Lnext:
758 index += codeLength!Char1(c2);
759 }
760 return -1;
761 }
762 }
763
764 /// Ditto
765 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
766 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive)
767 @safe
768 if (isSomeChar!Char1 && isSomeChar!Char2)
769 {
770 if (startIdx < s.length)
771 {
772 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub, cs);
773 if (foundIdx != -1)
774 {
775 return foundIdx + cast(ptrdiff_t) startIdx;
776 }
777 }
778 return -1;
779 }
780
781 ///
782 @safe pure unittest
783 {
784 import std.typecons : No;
785
786 string s = "Hello World";
787 assert(indexOf(s, "Wo", 4) == 6);
788 assert(indexOf(s, "Zo", 100) == -1);
789 assert(indexOf(s, "wo", 3, No.caseSensitive) == 6);
790 }
791
792 ///
793 @safe pure unittest
794 {
795 import std.typecons : No;
796
797 string s = "Hello World";
798 assert(indexOf(s, "Wo") == 6);
799 assert(indexOf(s, "Zo") == -1);
800 assert(indexOf(s, "wO", No.caseSensitive) == 6);
801 }
802
803 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub,
804 in CaseSensitive cs = Yes.caseSensitive)
805 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
806 isSomeChar!Char) &&
807 is(StringTypeOf!Range))
808 {
809 return indexOf!(StringTypeOf!Range)(s, sub, cs);
810 }
811
812 @safe pure unittest
813 {
814 assert(testAliasedString!indexOf("std/string.d", "string"));
815 }
816
817 @safe pure unittest
818 {
819 import std.conv : to;
820 import std.exception : assertCTFEable;
821 import std.traits : EnumMembers;
822
823 assertCTFEable!(
824 {
825 foreach (S; AliasSeq!(string, wstring, dstring))
826 {
827 foreach (T; AliasSeq!(string, wstring, dstring))
828 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
829 assert(indexOf(cast(S) null, to!T("a")) == -1);
830 assert(indexOf(to!S("def"), to!T("a")) == -1);
831 assert(indexOf(to!S("abba"), to!T("a")) == 0);
832 assert(indexOf(to!S("def"), to!T("f")) == 2);
833 assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3);
834 assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6);
835
836 assert(indexOf(to!S("dfeffgfff"), to!T("a"), No.caseSensitive) == -1);
837 assert(indexOf(to!S("def"), to!T("a"), No.caseSensitive) == -1);
838 assert(indexOf(to!S("abba"), to!T("a"), No.caseSensitive) == 0);
839 assert(indexOf(to!S("def"), to!T("f"), No.caseSensitive) == 2);
840 assert(indexOf(to!S("dfefffg"), to!T("fff"), No.caseSensitive) == 3);
841 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), No.caseSensitive) == 6);
842
843 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
844 S sMars = "Who\'s \'My Favorite Maritian?\'";
845
846 assert(indexOf(sMars, to!T("MY fAVe"), No.caseSensitive) == -1);
847 assert(indexOf(sMars, to!T("mY fAVOriTe"), No.caseSensitive) == 7);
848 assert(indexOf(sPlts, to!T("mArS:"), No.caseSensitive) == 0);
849 assert(indexOf(sPlts, to!T("rOcK"), No.caseSensitive) == 17);
850 assert(indexOf(sPlts, to!T("Un."), No.caseSensitive) == 41);
851 assert(indexOf(sPlts, to!T(sPlts), No.caseSensitive) == 0);
852
853 assert(indexOf("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
854
855 // Thanks to Carlos Santander B. and zwang
856 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
857 to!T("page-break-before"), No.caseSensitive) == -1);
858 }();
859
860 foreach (cs; EnumMembers!CaseSensitive)
861 {
862 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9);
863 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7);
864 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6);
865 }
866 }
867 });
868 }
869
870 @safe pure @nogc nothrow
871 unittest
872 {
873 import std.traits : EnumMembers;
874 import std.utf : byWchar;
875
876 foreach (cs; EnumMembers!CaseSensitive)
877 {
878 assert(indexOf("".byWchar, "", cs) == -1);
879 assert(indexOf("hello".byWchar, "", cs) == 0);
880 assert(indexOf("hello".byWchar, "l", cs) == 2);
881 assert(indexOf("heLLo".byWchar, "LL", cs) == 2);
882 assert(indexOf("hello".byWchar, "lox", cs) == -1);
883 assert(indexOf("hello".byWchar, "betty", cs) == -1);
884 assert(indexOf("hello\U00010143\u0100*\U00010143".byWchar, "\u0100*", cs) == 7);
885 }
886 }
887
888 @safe pure unittest
889 {
890 import std.conv : to;
891 import std.traits : EnumMembers;
892
893 foreach (S; AliasSeq!(string, wstring, dstring))
894 {
895 foreach (T; AliasSeq!(string, wstring, dstring))
896 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
897 assert(indexOf(cast(S) null, to!T("a"), 1337) == -1);
898 assert(indexOf(to!S("def"), to!T("a"), 0) == -1);
899 assert(indexOf(to!S("abba"), to!T("a"), 2) == 3);
900 assert(indexOf(to!S("def"), to!T("f"), 1) == 2);
901 assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3);
902 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6);
903
904 assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, No.caseSensitive) == -1);
905 assert(indexOf(to!S("def"), to!T("a"), 2, No.caseSensitive) == -1);
906 assert(indexOf(to!S("abba"), to!T("a"), 3, No.caseSensitive) == 3);
907 assert(indexOf(to!S("def"), to!T("f"), 1, No.caseSensitive) == 2);
908 assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, No.caseSensitive) == 3);
909 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, No.caseSensitive) == 6);
910 assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive) == 9,
911 to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive))
912 ~ " " ~ S.stringof ~ " " ~ T.stringof);
913
914 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
915 S sMars = "Who\'s \'My Favorite Maritian?\'";
916
917 assert(indexOf(sMars, to!T("MY fAVe"), 10,
918 No.caseSensitive) == -1);
919 assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, No.caseSensitive) == 7);
920 assert(indexOf(sPlts, to!T("mArS:"), 0, No.caseSensitive) == 0);
921 assert(indexOf(sPlts, to!T("rOcK"), 12, No.caseSensitive) == 17);
922 assert(indexOf(sPlts, to!T("Un."), 32, No.caseSensitive) == 41);
923 assert(indexOf(sPlts, to!T(sPlts), 0, No.caseSensitive) == 0);
924
925 assert(indexOf("\u0100", to!T("\u0100"), 0, No.caseSensitive) == 0);
926
927 // Thanks to Carlos Santander B. and zwang
928 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
929 to!T("page-break-before"), 10, No.caseSensitive) == -1);
930
931 // In order for indexOf with and without index to be consistent
932 assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0));
933 }();
934
935 foreach (cs; EnumMembers!CaseSensitive)
936 {
937 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"),
938 3, cs) == 9);
939 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"),
940 3, cs) == 7);
941 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"),
942 3, cs) == 6);
943 }
944 }
945 }
946
947 /++
948 Params:
949 s = string to search
950 c = character to search for
951 startIdx = the index into s to start searching from
952 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
953
954 Returns:
955 The index of the last occurrence of $(D c) in $(D s). If $(D c) is not
956 found, then $(D -1) is returned. The $(D startIdx) slices $(D s) in
957 the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
958 codeunit index in $(D s).
959
960 Throws:
961 If the sequence ending at $(D startIdx) does not represent a well
962 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
963
964 $(D cs) indicates whether the comparisons are case sensitive.
965 +/
966 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c,
967 in CaseSensitive cs = Yes.caseSensitive) @safe pure
968 if (isSomeChar!Char)
969 {
970 static import std.ascii, std.uni;
971 import std.utf : canSearchInCodeUnits;
972 if (cs == Yes.caseSensitive)
973 {
974 if (canSearchInCodeUnits!Char(c))
975 {
foreach_reverse(i,it;s)976 foreach_reverse (i, it; s)
977 {
978 if (it == c)
979 {
980 return i;
981 }
982 }
983 }
984 else
985 {
foreach_reverse(i,dchar it;s)986 foreach_reverse (i, dchar it; s)
987 {
988 if (it == c)
989 {
990 return i;
991 }
992 }
993 }
994 }
995 else
996 {
997 if (std.ascii.isASCII(c))
998 {
999 immutable c1 = std.ascii.toLower(c);
1000
foreach_reverse(i,it;s)1001 foreach_reverse (i, it; s)
1002 {
1003 immutable c2 = std.ascii.toLower(it);
1004 if (c1 == c2)
1005 {
1006 return i;
1007 }
1008 }
1009 }
1010 else
1011 {
1012 immutable c1 = std.uni.toLower(c);
1013
foreach_reverse(i,dchar it;s)1014 foreach_reverse (i, dchar it; s)
1015 {
1016 immutable c2 = std.uni.toLower(it);
1017 if (c1 == c2)
1018 {
1019 return i;
1020 }
1021 }
1022 }
1023 }
1024
1025 return -1;
1026 }
1027
1028 /// Ditto
1029 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, in size_t startIdx,
1030 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1031 if (isSomeChar!Char)
1032 {
1033 if (startIdx <= s.length)
1034 {
1035 return lastIndexOf(s[0u .. startIdx], c, cs);
1036 }
1037
1038 return -1;
1039 }
1040
1041 ///
1042 @safe pure unittest
1043 {
1044 import std.typecons : No;
1045
1046 string s = "Hello World";
1047 assert(lastIndexOf(s, 'l') == 9);
1048 assert(lastIndexOf(s, 'Z') == -1);
1049 assert(lastIndexOf(s, 'L', No.caseSensitive) == 9);
1050 }
1051
1052 ///
1053 @safe pure unittest
1054 {
1055 import std.typecons : No;
1056
1057 string s = "Hello World";
1058 assert(lastIndexOf(s, 'l', 4) == 3);
1059 assert(lastIndexOf(s, 'Z', 1337) == -1);
1060 assert(lastIndexOf(s, 'L', 7, No.caseSensitive) == 3);
1061 }
1062
1063 @safe pure unittest
1064 {
1065 import std.conv : to;
1066 import std.exception : assertCTFEable;
1067 import std.traits : EnumMembers;
1068
1069 assertCTFEable!(
1070 {
1071 foreach (S; AliasSeq!(string, wstring, dstring))
1072 {
1073 assert(lastIndexOf(cast(S) null, 'a') == -1);
1074 assert(lastIndexOf(to!S("def"), 'a') == -1);
1075 assert(lastIndexOf(to!S("abba"), 'a') == 3);
1076 assert(lastIndexOf(to!S("def"), 'f') == 2);
1077 assert(lastIndexOf(to!S("ödef"), 'ö') == 0);
1078
1079 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1080 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1081 assert(lastIndexOf(to!S("AbbA"), 'a', No.caseSensitive) == 3);
1082 assert(lastIndexOf(to!S("def"), 'F', No.caseSensitive) == 2);
1083 assert(lastIndexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
1084 assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"),
1085 No.caseSensitive) == 1);
1086
1087 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1088
1089 assert(lastIndexOf(to!S("def"), 'f', No.caseSensitive) == 2);
1090 assert(lastIndexOf(sPlts, 'M', No.caseSensitive) == 34);
1091 assert(lastIndexOf(sPlts, 'S', No.caseSensitive) == 40);
1092 }
1093
1094 foreach (cs; EnumMembers!CaseSensitive)
1095 {
1096 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1097 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1098 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1099 }
1100 });
1101 }
1102
1103 @safe pure unittest
1104 {
1105 import std.conv : to;
1106 import std.traits : EnumMembers;
1107
1108 foreach (S; AliasSeq!(string, wstring, dstring))
1109 {
1110 assert(lastIndexOf(cast(S) null, 'a') == -1);
1111 assert(lastIndexOf(to!S("def"), 'a') == -1);
1112 assert(lastIndexOf(to!S("abba"), 'a', 3) == 0);
1113 assert(lastIndexOf(to!S("deff"), 'f', 3) == 2);
1114
1115 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1116 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1117 assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), No.caseSensitive) == 3,
1118 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, No.caseSensitive)));
1119 assert(lastIndexOf(to!S("def"), 'F', 3, No.caseSensitive) == 2);
1120
1121 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1122
1123 assert(lastIndexOf(to!S("def"), 'f', 4, No.caseSensitive) == -1);
1124 assert(lastIndexOf(sPlts, 'M', sPlts.length -2, No.caseSensitive) == 34);
1125 assert(lastIndexOf(sPlts, 'S', sPlts.length -2, No.caseSensitive) == 40);
1126 }
1127
1128 foreach (cs; EnumMembers!CaseSensitive)
1129 {
1130 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1131 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1132 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1133 }
1134 }
1135
1136 /++
1137 Params:
1138 s = string to search
1139 sub = substring to search for
1140 startIdx = the index into s to start searching from
1141 cs = $(D Yes.caseSensitive) or $(D No.caseSensitive)
1142
1143 Returns:
1144 the index of the last occurrence of $(D sub) in $(D s). If $(D sub) is
1145 not found, then $(D -1) is returned. The $(D startIdx) slices $(D s)
1146 in the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
1147 codeunit index in $(D s).
1148
1149 Throws:
1150 If the sequence ending at $(D startIdx) does not represent a well
1151 formed codepoint, then a $(REF UTFException, std,utf) may be thrown.
1152
1153 $(D cs) indicates whether the comparisons are case sensitive.
1154 +/
1155 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1156 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1157 if (isSomeChar!Char1 && isSomeChar!Char2)
1158 {
1159 import std.algorithm.searching : endsWith;
1160 import std.conv : to;
1161 import std.range.primitives : walkLength;
1162 static import std.uni;
1163 import std.utf : strideBack;
1164 if (sub.empty)
1165 return -1;
1166
1167 if (walkLength(sub) == 1)
1168 return lastIndexOf(s, sub.front, cs);
1169
1170 if (cs == Yes.caseSensitive)
1171 {
1172 static if (is(Unqual!Char1 == Unqual!Char2))
1173 {
1174 import core.stdc.string : memcmp;
1175
1176 immutable c = sub[0];
1177
1178 for (ptrdiff_t i = s.length - sub.length; i >= 0; --i)
1179 {
1180 if (s[i] == c)
1181 {
1182 if (__ctfe)
1183 {
1184 foreach (j; 1 .. sub.length)
1185 {
1186 if (s[i + j] != sub[j])
1187 continue;
1188 }
1189 return i;
1190 }
1191 else
1192 {
trustedMemcmp(in void * s1,in void * s2,size_t n)1193 auto trustedMemcmp(in void* s1, in void* s2, size_t n) @trusted
1194 {
1195 return memcmp(s1, s2, n);
1196 }
1197 if (trustedMemcmp(&s[i + 1], &sub[1],
1198 (sub.length - 1) * Char1.sizeof) == 0)
1199 return i;
1200 }
1201 }
1202 }
1203 }
1204 else
1205 {
1206 for (size_t i = s.length; !s.empty;)
1207 {
1208 if (s.endsWith(sub))
1209 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1210
1211 i -= strideBack(s, i);
1212 s = s[0 .. i];
1213 }
1214 }
1215 }
1216 else
1217 {
1218 for (size_t i = s.length; !s.empty;)
1219 {
1220 if (endsWith!((a, b) => std.uni.toLower(a) == std.uni.toLower(b))
1221 (s, sub))
1222 {
1223 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1224 }
1225
1226 i -= strideBack(s, i);
1227 s = s[0 .. i];
1228 }
1229 }
1230
1231 return -1;
1232 }
1233
1234 /// Ditto
1235 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1236 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1237 if (isSomeChar!Char1 && isSomeChar!Char2)
1238 {
1239 if (startIdx <= s.length)
1240 {
1241 return lastIndexOf(s[0u .. startIdx], sub, cs);
1242 }
1243
1244 return -1;
1245 }
1246
1247 ///
1248 @safe pure unittest
1249 {
1250 import std.typecons : No;
1251
1252 string s = "Hello World";
1253 assert(lastIndexOf(s, "ll") == 2);
1254 assert(lastIndexOf(s, "Zo") == -1);
1255 assert(lastIndexOf(s, "lL", No.caseSensitive) == 2);
1256 }
1257
1258 ///
1259 @safe pure unittest
1260 {
1261 import std.typecons : No;
1262
1263 string s = "Hello World";
1264 assert(lastIndexOf(s, "ll", 4) == 2);
1265 assert(lastIndexOf(s, "Zo", 128) == -1);
1266 assert(lastIndexOf(s, "lL", 3, No.caseSensitive) == -1);
1267 }
1268
1269 @safe pure unittest
1270 {
1271 import std.conv : to;
1272
1273 foreach (S; AliasSeq!(string, wstring, dstring))
1274 {
1275 auto r = to!S("").lastIndexOf("hello");
1276 assert(r == -1, to!string(r));
1277
1278 r = to!S("hello").lastIndexOf("");
1279 assert(r == -1, to!string(r));
1280
1281 r = to!S("").lastIndexOf("");
1282 assert(r == -1, to!string(r));
1283 }
1284 }
1285
1286 @safe pure unittest
1287 {
1288 import std.conv : to;
1289 import std.exception : assertCTFEable;
1290 import std.traits : EnumMembers;
1291
1292 assertCTFEable!(
1293 {
1294 foreach (S; AliasSeq!(string, wstring, dstring))
1295 {
1296 foreach (T; AliasSeq!(string, wstring, dstring))
1297 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1298 enum typeStr = S.stringof ~ " " ~ T.stringof;
1299
1300 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1301 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr);
1302 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr);
1303 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr);
1304 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr);
1305 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr);
1306 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr);
1307 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr);
1308 assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == -1, typeStr);
1309 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr);
1310
1311 assert(lastIndexOf(cast(S) null, to!T("a"), No.caseSensitive) == -1, typeStr);
1312 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1313 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), No.caseSensitive) == 6, typeStr);
1314 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), No.caseSensitive) == -1, typeStr);
1315 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), No.caseSensitive) == -1, typeStr);
1316 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), No.caseSensitive) == -1, typeStr);
1317 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), No.caseSensitive) == 0, typeStr);
1318
1319 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1320 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), No.caseSensitive) == 6, typeStr);
1321 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), No.caseSensitive) == 7, typeStr);
1322
1323 assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), Yes.caseSensitive) == 0);
1324
1325 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1326 S sMars = "Who\'s \'My Favorite Maritian?\'";
1327
1328 assert(lastIndexOf(sMars, to!T("RiTE maR"), No.caseSensitive) == 14, typeStr);
1329 assert(lastIndexOf(sPlts, to!T("FOuRTh"), No.caseSensitive) == 10, typeStr);
1330 assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), No.caseSensitive) == 0, typeStr);
1331 assert(lastIndexOf(sMars, to!T(sMars), No.caseSensitive) == 0, typeStr);
1332 }();
1333
1334 foreach (cs; EnumMembers!CaseSensitive)
1335 {
1336 enum csString = to!string(cs);
1337
1338 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString);
1339 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString);
1340 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString);
1341 }
1342 }
1343 });
1344 }
1345
1346 @safe pure unittest // issue13529
1347 {
1348 import std.conv : to;
1349 foreach (S; AliasSeq!(string, wstring, dstring))
1350 {
1351 foreach (T; AliasSeq!(string, wstring, dstring))
1352 {
1353 enum typeStr = S.stringof ~ " " ~ T.stringof;
1354 auto idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö ö"));
1355 assert(idx != -1, to!string(idx) ~ " " ~ typeStr);
1356
1357 idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö öd"));
1358 assert(idx == -1, to!string(idx) ~ " " ~ typeStr);
1359 }
1360 }
1361 }
1362
1363 @safe pure unittest
1364 {
1365 import std.conv : to;
1366 import std.traits : EnumMembers;
1367
1368 foreach (S; AliasSeq!(string, wstring, dstring))
1369 {
1370 foreach (T; AliasSeq!(string, wstring, dstring))
1371 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1372 enum typeStr = S.stringof ~ " " ~ T.stringof;
1373
1374 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1375 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr);
1376 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr);
1377 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~
1378 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6)));
1379 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr);
1380 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr);
1381 assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr);
1382 assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr);
1383 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == -1, typeStr);
1384 assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~
1385 to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472
1386
1387 assert(lastIndexOf(cast(S) null, to!T("a"), 1, No.caseSensitive) == -1, typeStr);
1388 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, No.caseSensitive) == 2, typeStr);
1389 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, No.caseSensitive) == 2, typeStr ~
1390 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, No.caseSensitive)));
1391 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , No.caseSensitive) == -1, typeStr);
1392 assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, No.caseSensitive) == -1, typeStr);
1393 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, No.caseSensitive) == -1, typeStr);
1394
1395 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, No.caseSensitive) == 2, typeStr);
1396 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, No.caseSensitive) == 2, typeStr);
1397 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, No.caseSensitive) == 3, typeStr);
1398 assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr);
1399 }();
1400
1401 foreach (cs; EnumMembers!CaseSensitive)
1402 {
1403 enum csString = to!string(cs);
1404
1405 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString);
1406 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString);
1407 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString);
1408 }
1409 }
1410 }
1411
1412 private ptrdiff_t indexOfAnyNeitherImpl(bool forward, bool any, Char, Char2)(
1413 const(Char)[] haystack, const(Char2)[] needles,
1414 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1415 if (isSomeChar!Char && isSomeChar!Char2)
1416 {
1417 import std.algorithm.searching : canFind, findAmong;
1418 if (cs == Yes.caseSensitive)
1419 {
1420 static if (forward)
1421 {
1422 static if (any)
1423 {
1424 size_t n = haystack.findAmong(needles).length;
1425 return n ? haystack.length - n : -1;
1426 }
1427 else
1428 {
foreach(idx,dchar hay;haystack)1429 foreach (idx, dchar hay; haystack)
1430 {
1431 if (!canFind(needles, hay))
1432 {
1433 return idx;
1434 }
1435 }
1436 }
1437 }
1438 else
1439 {
1440 static if (any)
1441 {
1442 import std.range : retro;
1443 import std.utf : strideBack;
1444 size_t n = haystack.retro.findAmong(needles).source.length;
1445 if (n)
1446 {
1447 return n - haystack.strideBack(n);
1448 }
1449 }
1450 else
1451 {
foreach_reverse(idx,dchar hay;haystack)1452 foreach_reverse (idx, dchar hay; haystack)
1453 {
1454 if (!canFind(needles, hay))
1455 {
1456 return idx;
1457 }
1458 }
1459 }
1460 }
1461 }
1462 else
1463 {
1464 import std.range.primitives : walkLength;
1465 if (needles.length <= 16 && needles.walkLength(17))
1466 {
1467 size_t si = 0;
1468 dchar[16] scratch = void;
foreach(dchar c;needles)1469 foreach ( dchar c; needles)
1470 {
1471 scratch[si++] = toLower(c);
1472 }
1473
1474 static if (forward)
1475 {
foreach(i,dchar c;haystack)1476 foreach (i, dchar c; haystack)
1477 {
1478 if (canFind(scratch[0 .. si], toLower(c)) == any)
1479 {
1480 return i;
1481 }
1482 }
1483 }
1484 else
1485 {
foreach_reverse(i,dchar c;haystack)1486 foreach_reverse (i, dchar c; haystack)
1487 {
1488 if (canFind(scratch[0 .. si], toLower(c)) == any)
1489 {
1490 return i;
1491 }
1492 }
1493 }
1494 }
1495 else
1496 {
f(dchar a,dchar b)1497 static bool f(dchar a, dchar b)
1498 {
1499 return toLower(a) == b;
1500 }
1501
1502 static if (forward)
1503 {
foreach(i,dchar c;haystack)1504 foreach (i, dchar c; haystack)
1505 {
1506 if (canFind!f(needles, toLower(c)) == any)
1507 {
1508 return i;
1509 }
1510 }
1511 }
1512 else
1513 {
foreach_reverse(i,dchar c;haystack)1514 foreach_reverse (i, dchar c; haystack)
1515 {
1516 if (canFind!f(needles, toLower(c)) == any)
1517 {
1518 return i;
1519 }
1520 }
1521 }
1522 }
1523 }
1524
1525 return -1;
1526 }
1527
1528 /**
1529 Returns the index of the first occurrence of any of the elements in $(D
1530 needles) in $(D haystack). If no element of $(D needles) is found,
1531 then $(D -1) is returned. The $(D startIdx) slices $(D haystack) in the
1532 following way $(D haystack[startIdx .. $]). $(D startIdx) represents a
1533 codeunit index in $(D haystack). If the sequence ending at $(D startIdx)
1534 does not represent a well formed codepoint, then a $(REF UTFException, std,utf)
1535 may be thrown.
1536
1537 Params:
1538 haystack = String to search for needles in.
1539 needles = Strings to search for in haystack.
1540 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If
1541 the startIdx is greater equal the length of haystack the functions
1542 returns $(D -1).
1543 cs = Indicates whether the comparisons are case sensitive.
1544 */
1545 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1546 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1547 if (isSomeChar!Char && isSomeChar!Char2)
1548 {
1549 return indexOfAnyNeitherImpl!(true, true)(haystack, needles, cs);
1550 }
1551
1552 /// Ditto
1553 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1554 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1555 if (isSomeChar!Char && isSomeChar!Char2)
1556 {
1557 if (startIdx < haystack.length)
1558 {
1559 ptrdiff_t foundIdx = indexOfAny(haystack[startIdx .. $], needles, cs);
1560 if (foundIdx != -1)
1561 {
1562 return foundIdx + cast(ptrdiff_t) startIdx;
1563 }
1564 }
1565
1566 return -1;
1567 }
1568
1569 ///
1570 @safe pure unittest
1571 {
1572 import std.conv : to;
1573
1574 ptrdiff_t i = "helloWorld".indexOfAny("Wr");
1575 assert(i == 5);
1576 i = "öällo world".indexOfAny("lo ");
1577 assert(i == 4, to!string(i));
1578 }
1579
1580 ///
1581 @safe pure unittest
1582 {
1583 import std.conv : to;
1584
1585 ptrdiff_t i = "helloWorld".indexOfAny("Wr", 4);
1586 assert(i == 5);
1587
1588 i = "Foo öällo world".indexOfAny("lh", 3);
1589 assert(i == 8, to!string(i));
1590 }
1591
1592 @safe pure unittest
1593 {
1594 import std.conv : to;
1595
1596 foreach (S; AliasSeq!(string, wstring, dstring))
1597 {
1598 auto r = to!S("").indexOfAny("hello");
1599 assert(r == -1, to!string(r));
1600
1601 r = to!S("hello").indexOfAny("");
1602 assert(r == -1, to!string(r));
1603
1604 r = to!S("").indexOfAny("");
1605 assert(r == -1, to!string(r));
1606 }
1607 }
1608
1609 @safe pure unittest
1610 {
1611 import std.conv : to;
1612 import std.exception : assertCTFEable;
1613
1614 assertCTFEable!(
1615 {
1616 foreach (S; AliasSeq!(string, wstring, dstring))
1617 {
1618 foreach (T; AliasSeq!(string, wstring, dstring))
1619 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1620 assert(indexOfAny(cast(S) null, to!T("a")) == -1);
1621 assert(indexOfAny(to!S("def"), to!T("rsa")) == -1);
1622 assert(indexOfAny(to!S("abba"), to!T("a")) == 0);
1623 assert(indexOfAny(to!S("def"), to!T("f")) == 2);
1624 assert(indexOfAny(to!S("dfefffg"), to!T("fgh")) == 1);
1625 assert(indexOfAny(to!S("dfeffgfff"), to!T("feg")) == 1);
1626
1627 assert(indexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
1628 No.caseSensitive) == -1);
1629 assert(indexOfAny(to!S("def"), to!T("MI6"),
1630 No.caseSensitive) == -1);
1631 assert(indexOfAny(to!S("abba"), to!T("DEA"),
1632 No.caseSensitive) == 0);
1633 assert(indexOfAny(to!S("def"), to!T("FBI"), No.caseSensitive) == 2);
1634 assert(indexOfAny(to!S("dfefffg"), to!T("NSA"), No.caseSensitive)
1635 == -1);
1636 assert(indexOfAny(to!S("dfeffgfff"), to!T("BND"),
1637 No.caseSensitive) == 0);
1638 assert(indexOfAny(to!S("dfeffgfff"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
1639 No.caseSensitive) == 0);
1640
1641 assert(indexOfAny("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
1642 }();
1643 }
1644 }
1645 );
1646 }
1647
1648 @safe pure unittest
1649 {
1650 import std.conv : to;
1651 import std.traits : EnumMembers;
1652
1653 foreach (S; AliasSeq!(string, wstring, dstring))
1654 {
1655 foreach (T; AliasSeq!(string, wstring, dstring))
1656 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1657 assert(indexOfAny(cast(S) null, to!T("a"), 1337) == -1);
1658 assert(indexOfAny(to!S("def"), to!T("AaF"), 0) == -1);
1659 assert(indexOfAny(to!S("abba"), to!T("NSa"), 2) == 3);
1660 assert(indexOfAny(to!S("def"), to!T("fbi"), 1) == 2);
1661 assert(indexOfAny(to!S("dfefffg"), to!T("foo"), 2) == 3);
1662 assert(indexOfAny(to!S("dfeffgfff"), to!T("fsb"), 5) == 6);
1663
1664 assert(indexOfAny(to!S("dfeffgfff"), to!T("NDS"), 1,
1665 No.caseSensitive) == -1);
1666 assert(indexOfAny(to!S("def"), to!T("DRS"), 2,
1667 No.caseSensitive) == -1);
1668 assert(indexOfAny(to!S("abba"), to!T("SI"), 3,
1669 No.caseSensitive) == -1);
1670 assert(indexOfAny(to!S("deO"), to!T("ASIO"), 1,
1671 No.caseSensitive) == 2);
1672 assert(indexOfAny(to!S("dfefffg"), to!T("fbh"), 2,
1673 No.caseSensitive) == 3);
1674 assert(indexOfAny(to!S("dfeffgfff"), to!T("fEe"), 4,
1675 No.caseSensitive) == 4);
1676 assert(indexOfAny(to!S("dfeffgffföä"), to!T("föä"), 9,
1677 No.caseSensitive) == 9);
1678
1679 assert(indexOfAny("\u0100", to!T("\u0100"), 0,
1680 No.caseSensitive) == 0);
1681 }();
1682
1683 foreach (cs; EnumMembers!CaseSensitive)
1684 {
1685 assert(indexOfAny("hello\U00010143\u0100\U00010143",
1686 to!S("e\u0100"), 3, cs) == 9);
1687 assert(indexOfAny("hello\U00010143\u0100\U00010143"w,
1688 to!S("h\u0100"), 3, cs) == 7);
1689 assert(indexOfAny("hello\U00010143\u0100\U00010143"d,
1690 to!S("l\u0100"), 5, cs) == 6);
1691 }
1692 }
1693 }
1694
1695 /**
1696 Returns the index of the last occurrence of any of the elements in $(D
1697 needles) in $(D haystack). If no element of $(D needles) is found,
1698 then $(D -1) is returned. The $(D stopIdx) slices $(D haystack) in the
1699 following way $(D s[0 .. stopIdx]). $(D stopIdx) represents a codeunit
1700 index in $(D haystack). If the sequence ending at $(D startIdx) does not
1701 represent a well formed codepoint, then a $(REF UTFException, std,utf) may be
1702 thrown.
1703
1704 Params:
1705 haystack = String to search for needles in.
1706 needles = Strings to search for in haystack.
1707 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]). If
1708 the stopIdx is greater equal the length of haystack the functions
1709 returns $(D -1).
1710 cs = Indicates whether the comparisons are case sensitive.
1711 */
1712 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1713 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
1714 @safe pure
1715 if (isSomeChar!Char && isSomeChar!Char2)
1716 {
1717 return indexOfAnyNeitherImpl!(false, true)(haystack, needles, cs);
1718 }
1719
1720 /// Ditto
1721 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1722 const(Char2)[] needles, in size_t stopIdx,
1723 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1724 if (isSomeChar!Char && isSomeChar!Char2)
1725 {
1726 if (stopIdx <= haystack.length)
1727 {
1728 return lastIndexOfAny(haystack[0u .. stopIdx], needles, cs);
1729 }
1730
1731 return -1;
1732 }
1733
1734 ///
1735 @safe pure unittest
1736 {
1737 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo");
1738 assert(i == 8);
1739
1740 i = "Foo öäöllo world".lastIndexOfAny("öF");
1741 assert(i == 8);
1742 }
1743
1744 ///
1745 @safe pure unittest
1746 {
1747 import std.conv : to;
1748
1749 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo", 4);
1750 assert(i == 3);
1751
1752 i = "Foo öäöllo world".lastIndexOfAny("öF", 3);
1753 assert(i == 0);
1754 }
1755
1756 @safe pure unittest
1757 {
1758 import std.conv : to;
1759
1760 foreach (S; AliasSeq!(string, wstring, dstring))
1761 {
1762 auto r = to!S("").lastIndexOfAny("hello");
1763 assert(r == -1, to!string(r));
1764
1765 r = to!S("hello").lastIndexOfAny("");
1766 assert(r == -1, to!string(r));
1767
1768 r = to!S("").lastIndexOfAny("");
1769 assert(r == -1, to!string(r));
1770 }
1771 }
1772
1773 @safe pure unittest
1774 {
1775 import std.conv : to;
1776 import std.exception : assertCTFEable;
1777
1778 assertCTFEable!(
1779 {
1780 foreach (S; AliasSeq!(string, wstring, dstring))
1781 {
1782 foreach (T; AliasSeq!(string, wstring, dstring))
1783 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1784 assert(lastIndexOfAny(cast(S) null, to!T("a")) == -1);
1785 assert(lastIndexOfAny(to!S("def"), to!T("rsa")) == -1);
1786 assert(lastIndexOfAny(to!S("abba"), to!T("a")) == 3);
1787 assert(lastIndexOfAny(to!S("def"), to!T("f")) == 2);
1788 assert(lastIndexOfAny(to!S("dfefffg"), to!T("fgh")) == 6);
1789
1790 ptrdiff_t oeIdx = 9;
1791 if (is(S == wstring) || is(S == dstring))
1792 {
1793 oeIdx = 8;
1794 }
1795
1796 auto foundOeIdx = lastIndexOfAny(to!S("dfeffgföf"), to!T("feg"));
1797 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
1798
1799 assert(lastIndexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
1800 No.caseSensitive) == -1);
1801 assert(lastIndexOfAny(to!S("def"), to!T("MI6"),
1802 No.caseSensitive) == -1);
1803 assert(lastIndexOfAny(to!S("abba"), to!T("DEA"),
1804 No.caseSensitive) == 3);
1805 assert(lastIndexOfAny(to!S("def"), to!T("FBI"),
1806 No.caseSensitive) == 2);
1807 assert(lastIndexOfAny(to!S("dfefffg"), to!T("NSA"),
1808 No.caseSensitive) == -1);
1809
1810 oeIdx = 2;
1811 if (is(S == wstring) || is(S == dstring))
1812 {
1813 oeIdx = 1;
1814 }
1815 assert(lastIndexOfAny(to!S("ödfeffgfff"), to!T("BND"),
1816 No.caseSensitive) == oeIdx);
1817
1818 assert(lastIndexOfAny("\u0100", to!T("\u0100"),
1819 No.caseSensitive) == 0);
1820 }();
1821 }
1822 }
1823 );
1824 }
1825
1826 @safe pure unittest
1827 {
1828 import std.conv : to;
1829 import std.exception : assertCTFEable;
1830
1831 assertCTFEable!(
1832 {
1833 foreach (S; AliasSeq!(string, wstring, dstring))
1834 {
1835 foreach (T; AliasSeq!(string, wstring, dstring))
1836 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1837 enum typeStr = S.stringof ~ " " ~ T.stringof;
1838
1839 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337) == -1,
1840 typeStr);
1841 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("c"), 7) == 6,
1842 typeStr);
1843 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("cd"), 5) == 3,
1844 typeStr);
1845 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("ef"), 6) == 5,
1846 typeStr);
1847 assert(lastIndexOfAny(to!S("abcdefCdef"), to!T("c"), 8) == 2,
1848 typeStr);
1849 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("x"), 7) == -1,
1850 typeStr);
1851 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("xy"), 4) == -1,
1852 typeStr);
1853 assert(lastIndexOfAny(to!S("öabcdefcdef"), to!T("ö"), 2) == 0,
1854 typeStr);
1855
1856 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337,
1857 No.caseSensitive) == -1, typeStr);
1858 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("C"), 7,
1859 No.caseSensitive) == 6, typeStr);
1860 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("cd"), 5,
1861 No.caseSensitive) == 3, typeStr);
1862 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("EF"), 6,
1863 No.caseSensitive) == 5, typeStr);
1864 assert(lastIndexOfAny(to!S("ABCDEFcDEF"), to!T("C"), 8,
1865 No.caseSensitive) == 6, typeStr);
1866 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("x"), 7,
1867 No.caseSensitive) == -1, typeStr);
1868 assert(lastIndexOfAny(to!S("abCdefcdef"), to!T("XY"), 4,
1869 No.caseSensitive) == -1, typeStr);
1870 assert(lastIndexOfAny(to!S("ÖABCDEFCDEF"), to!T("ö"), 2,
1871 No.caseSensitive) == 0, typeStr);
1872 }();
1873 }
1874 }
1875 );
1876 }
1877
1878 /**
1879 Returns the index of the first occurrence of any character not an elements
1880 in $(D needles) in $(D haystack). If all element of $(D haystack) are
1881 element of $(D needles) $(D -1) is returned.
1882
1883 Params:
1884 haystack = String to search for needles in.
1885 needles = Strings to search for in haystack.
1886 startIdx = slices haystack like this $(D haystack[startIdx .. $]). If
1887 the startIdx is greater equal the length of haystack the functions
1888 returns $(D -1).
1889 cs = Indicates whether the comparisons are case sensitive.
1890 */
1891 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
1892 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
1893 @safe pure
1894 if (isSomeChar!Char && isSomeChar!Char2)
1895 {
1896 return indexOfAnyNeitherImpl!(true, false)(haystack, needles, cs);
1897 }
1898
1899 /// Ditto
1900 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
1901 const(Char2)[] needles, in size_t startIdx,
1902 in CaseSensitive cs = Yes.caseSensitive)
1903 @safe pure
1904 if (isSomeChar!Char && isSomeChar!Char2)
1905 {
1906 if (startIdx < haystack.length)
1907 {
1908 ptrdiff_t foundIdx = indexOfAnyNeitherImpl!(true, false)(
1909 haystack[startIdx .. $], needles, cs);
1910 if (foundIdx != -1)
1911 {
1912 return foundIdx + cast(ptrdiff_t) startIdx;
1913 }
1914 }
1915 return -1;
1916 }
1917
1918 ///
1919 @safe pure unittest
1920 {
1921 assert(indexOfNeither("abba", "a", 2) == 2);
1922 assert(indexOfNeither("def", "de", 1) == 2);
1923 assert(indexOfNeither("dfefffg", "dfe", 4) == 6);
1924 }
1925
1926 ///
1927 @safe pure unittest
1928 {
1929 assert(indexOfNeither("def", "a") == 0);
1930 assert(indexOfNeither("def", "de") == 2);
1931 assert(indexOfNeither("dfefffg", "dfe") == 6);
1932 }
1933
1934 @safe pure unittest
1935 {
1936 import std.conv : to;
1937
1938 foreach (S; AliasSeq!(string, wstring, dstring))
1939 {
1940 auto r = to!S("").indexOfNeither("hello");
1941 assert(r == -1, to!string(r));
1942
1943 r = to!S("hello").indexOfNeither("");
1944 assert(r == 0, to!string(r));
1945
1946 r = to!S("").indexOfNeither("");
1947 assert(r == -1, to!string(r));
1948 }
1949 }
1950
1951 @safe pure unittest
1952 {
1953 import std.conv : to;
1954 import std.exception : assertCTFEable;
1955
1956 assertCTFEable!(
1957 {
1958 foreach (S; AliasSeq!(string, wstring, dstring))
1959 {
1960 foreach (T; AliasSeq!(string, wstring, dstring))
1961 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
1962 assert(indexOfNeither(cast(S) null, to!T("a")) == -1);
1963 assert(indexOfNeither("abba", "a") == 1);
1964
1965 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"),
1966 No.caseSensitive) == 0);
1967 assert(indexOfNeither(to!S("def"), to!T("D"),
1968 No.caseSensitive) == 1);
1969 assert(indexOfNeither(to!S("ABca"), to!T("a"),
1970 No.caseSensitive) == 1);
1971 assert(indexOfNeither(to!S("def"), to!T("f"),
1972 No.caseSensitive) == 0);
1973 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"),
1974 No.caseSensitive) == 6);
1975 if (is(S == string))
1976 {
1977 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1978 No.caseSensitive) == 8,
1979 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1980 No.caseSensitive)));
1981 }
1982 else
1983 {
1984 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1985 No.caseSensitive) == 7,
1986 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
1987 No.caseSensitive)));
1988 }
1989 }();
1990 }
1991 }
1992 );
1993 }
1994
1995 @safe pure unittest
1996 {
1997 import std.conv : to;
1998 import std.exception : assertCTFEable;
1999
2000 assertCTFEable!(
2001 {
2002 foreach (S; AliasSeq!(string, wstring, dstring))
2003 {
2004 foreach (T; AliasSeq!(string, wstring, dstring))
2005 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
2006 assert(indexOfNeither(cast(S) null, to!T("a"), 1) == -1);
2007 assert(indexOfNeither(to!S("def"), to!T("a"), 1) == 1,
2008 to!string(indexOfNeither(to!S("def"), to!T("a"), 1)));
2009
2010 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 4,
2011 No.caseSensitive) == 4);
2012 assert(indexOfNeither(to!S("def"), to!T("D"), 2,
2013 No.caseSensitive) == 2);
2014 assert(indexOfNeither(to!S("ABca"), to!T("a"), 3,
2015 No.caseSensitive) == -1);
2016 assert(indexOfNeither(to!S("def"), to!T("tzf"), 2,
2017 No.caseSensitive) == -1);
2018 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 5,
2019 No.caseSensitive) == 6);
2020 if (is(S == string))
2021 {
2022 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2023 No.caseSensitive) == 3, to!string(indexOfNeither(
2024 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2025 }
2026 else
2027 {
2028 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2029 No.caseSensitive) == 2, to!string(indexOfNeither(
2030 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2031 }
2032 }();
2033 }
2034 }
2035 );
2036 }
2037
2038 /**
2039 Returns the last index of the first occurence of any character that is not
2040 an elements in $(D needles) in $(D haystack). If all element of
2041 $(D haystack) are element of $(D needles) $(D -1) is returned.
2042
2043 Params:
2044 haystack = String to search for needles in.
2045 needles = Strings to search for in haystack.
2046 stopIdx = slices haystack like this $(D haystack[0 .. stopIdx]) If
2047 the stopIdx is greater equal the length of haystack the functions
2048 returns $(D -1).
2049 cs = Indicates whether the comparisons are case sensitive.
2050 */
2051 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2052 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
2053 @safe pure
2054 if (isSomeChar!Char && isSomeChar!Char2)
2055 {
2056 return indexOfAnyNeitherImpl!(false, false)(haystack, needles, cs);
2057 }
2058
2059 /// Ditto
2060 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2061 const(Char2)[] needles, in size_t stopIdx,
2062 in CaseSensitive cs = Yes.caseSensitive)
2063 @safe pure
2064 if (isSomeChar!Char && isSomeChar!Char2)
2065 {
2066 if (stopIdx < haystack.length)
2067 {
2068 return indexOfAnyNeitherImpl!(false, false)(haystack[0 .. stopIdx],
2069 needles, cs);
2070 }
2071 return -1;
2072 }
2073
2074 ///
2075 @safe pure unittest
2076 {
2077 assert(lastIndexOfNeither("abba", "a") == 2);
2078 assert(lastIndexOfNeither("def", "f") == 1);
2079 }
2080
2081 ///
2082 @safe pure unittest
2083 {
2084 assert(lastIndexOfNeither("def", "rsa", 3) == -1);
2085 assert(lastIndexOfNeither("abba", "a", 2) == 1);
2086 }
2087
2088 @safe pure unittest
2089 {
2090 import std.conv : to;
2091
2092 foreach (S; AliasSeq!(string, wstring, dstring))
2093 {
2094 auto r = to!S("").lastIndexOfNeither("hello");
2095 assert(r == -1, to!string(r));
2096
2097 r = to!S("hello").lastIndexOfNeither("");
2098 assert(r == 4, to!string(r));
2099
2100 r = to!S("").lastIndexOfNeither("");
2101 assert(r == -1, to!string(r));
2102 }
2103 }
2104
2105 @safe pure unittest
2106 {
2107 import std.conv : to;
2108 import std.exception : assertCTFEable;
2109
2110 assertCTFEable!(
2111 {
2112 foreach (S; AliasSeq!(string, wstring, dstring))
2113 {
2114 foreach (T; AliasSeq!(string, wstring, dstring))
2115 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
2116 assert(lastIndexOfNeither(cast(S) null, to!T("a")) == -1);
2117 assert(lastIndexOfNeither(to!S("def"), to!T("rsa")) == 2);
2118 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2119
2120 ptrdiff_t oeIdx = 8;
2121 if (is(S == string))
2122 {
2123 oeIdx = 9;
2124 }
2125
2126 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"));
2127 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2128
2129 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"),
2130 No.caseSensitive) == 5);
2131 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"),
2132 No.caseSensitive) == 2, to!string(lastIndexOfNeither(to!S("def"),
2133 to!T("MI6"), No.caseSensitive)));
2134 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"),
2135 No.caseSensitive) == 6, to!string(lastIndexOfNeither(
2136 to!S("abbadeafsb"), to!T("fSb"), No.caseSensitive)));
2137 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"),
2138 No.caseSensitive) == 1);
2139 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"),
2140 No.caseSensitive) == 6);
2141 assert(lastIndexOfNeither(to!S("dfeffgfffö"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
2142 No.caseSensitive) == 8, to!string(lastIndexOfNeither(to!S("dfeffgfffö"),
2143 to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), No.caseSensitive)));
2144 }();
2145 }
2146 }
2147 );
2148 }
2149
2150 @safe pure unittest
2151 {
2152 import std.conv : to;
2153 import std.exception : assertCTFEable;
2154
2155 assertCTFEable!(
2156 {
2157 foreach (S; AliasSeq!(string, wstring, dstring))
2158 {
2159 foreach (T; AliasSeq!(string, wstring, dstring))
2160 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
2161 assert(lastIndexOfNeither(cast(S) null, to!T("a"), 1337) == -1);
2162 assert(lastIndexOfNeither(to!S("def"), to!T("f")) == 1);
2163 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2164
2165 ptrdiff_t oeIdx = 4;
2166 if (is(S == string))
2167 {
2168 oeIdx = 5;
2169 }
2170
2171 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"),
2172 7);
2173 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2174
2175 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 6,
2176 No.caseSensitive) == 5);
2177 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2,
2178 No.caseSensitive) == 1, to!string(lastIndexOfNeither(to!S("def"),
2179 to!T("MI6"), 2, No.caseSensitive)));
2180 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 6,
2181 No.caseSensitive) == 5, to!string(lastIndexOfNeither(
2182 to!S("abbadeafsb"), to!T("fSb"), 6, No.caseSensitive)));
2183 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 3,
2184 No.caseSensitive) == 1);
2185 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2,
2186 No.caseSensitive) == 1, to!string(lastIndexOfNeither(
2187 to!S("dfefffg"), to!T("NSA"), 2, No.caseSensitive)));
2188 }();
2189 }
2190 }
2191 );
2192 }
2193
2194 /**
2195 * Returns the _representation of a string, which has the same type
2196 * as the string except the character type is replaced by $(D ubyte),
2197 * $(D ushort), or $(D uint) depending on the character width.
2198 *
2199 * Params:
2200 * s = The string to return the _representation of.
2201 *
2202 * Returns:
2203 * The _representation of the passed string.
2204 */
2205 auto representation(Char)(Char[] s) @safe pure nothrow @nogc
2206 if (isSomeChar!Char)
2207 {
2208 import std.traits : ModifyTypePreservingTQ;
2209 alias ToRepType(T) = AliasSeq!(ubyte, ushort, uint)[T.sizeof / 2];
2210 return cast(ModifyTypePreservingTQ!(ToRepType, Char)[])s;
2211 }
2212
2213 ///
2214 @safe pure unittest
2215 {
2216 string s = "hello";
2217 static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
2218 assert(representation(s) is cast(immutable(ubyte)[]) s);
2219 assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]);
2220 }
2221
2222 @system pure unittest
2223 {
2224 import std.exception : assertCTFEable;
2225 import std.traits : Fields;
2226 import std.typecons : Tuple;
2227
2228 assertCTFEable!(
2229 {
2230 void test(Char, T)(Char[] str)
2231 {
2232 static assert(is(typeof(representation(str)) == T[]));
2233 assert(representation(str) is cast(T[]) str);
2234 }
2235
2236 foreach (Type; AliasSeq!(Tuple!(char , ubyte ),
2237 Tuple!(wchar, ushort),
2238 Tuple!(dchar, uint )))
2239 {
2240 alias Char = Fields!Type[0];
2241 alias Int = Fields!Type[1];
2242 enum immutable(Char)[] hello = "hello";
2243
2244 test!( immutable Char, immutable Int)(hello);
2245 test!( const Char, const Int)(hello);
2246 test!( Char, Int)(hello.dup);
2247 test!( shared Char, shared Int)(cast(shared) hello.dup);
2248 test!(const shared Char, const shared Int)(hello);
2249 }
2250 });
2251 }
2252
2253
2254 /**
2255 * Capitalize the first character of $(D s) and convert the rest of $(D s) to
2256 * lowercase.
2257 *
2258 * Params:
2259 * input = The string to _capitalize.
2260 *
2261 * Returns:
2262 * The capitalized string.
2263 *
2264 * See_Also:
2265 * $(REF asCapitalized, std,uni) for a lazy range version that doesn't allocate memory
2266 */
2267 S capitalize(S)(S input) @trusted pure
2268 if (isSomeString!S)
2269 {
2270 import std.array : array;
2271 import std.uni : asCapitalized;
2272 import std.utf : byUTF;
2273
2274 return input.asCapitalized.byUTF!(ElementEncodingType!(S)).array;
2275 }
2276
2277 ///
2278 pure @safe unittest
2279 {
2280 assert(capitalize("hello") == "Hello");
2281 assert(capitalize("World") == "World");
2282 }
2283
2284 auto capitalize(S)(auto ref S s)
2285 if (!isSomeString!S && is(StringTypeOf!S))
2286 {
2287 return capitalize!(StringTypeOf!S)(s);
2288 }
2289
2290 @safe pure unittest
2291 {
2292 assert(testAliasedString!capitalize("hello"));
2293 }
2294
2295 @safe pure unittest
2296 {
2297 import std.algorithm.comparison : cmp;
2298 import std.conv : to;
2299 import std.exception : assertCTFEable;
2300
2301 assertCTFEable!(
2302 {
2303 foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
2304 {
2305 S s1 = to!S("FoL");
2306 S s2;
2307
2308 s2 = capitalize(s1);
2309 assert(cmp(s2, "Fol") == 0);
2310 assert(s2 !is s1);
2311
2312 s2 = capitalize(s1[0 .. 2]);
2313 assert(cmp(s2, "Fo") == 0);
2314
2315 s1 = to!S("fOl");
2316 s2 = capitalize(s1);
2317 assert(cmp(s2, "Fol") == 0);
2318 assert(s2 !is s1);
2319 s1 = to!S("\u0131 \u0130");
2320 s2 = capitalize(s1);
2321 assert(cmp(s2, "\u0049 i\u0307") == 0);
2322 assert(s2 !is s1);
2323
2324 s1 = to!S("\u017F \u0049");
2325 s2 = capitalize(s1);
2326 assert(cmp(s2, "\u0053 \u0069") == 0);
2327 assert(s2 !is s1);
2328 }
2329 });
2330 }
2331
2332 /++
2333 Split $(D s) into an array of lines according to the unicode standard using
2334 $(D '\r'), $(D '\n'), $(D "\r\n"), $(REF lineSep, std,uni),
2335 $(REF paraSep, std,uni), $(D U+0085) (NEL), $(D '\v') and $(D '\f')
2336 as delimiters. If $(D keepTerm) is set to $(D KeepTerminator.yes), then the
2337 delimiter is included in the strings returned.
2338
2339 Does not throw on invalid UTF; such is simply passed unchanged
2340 to the output.
2341
2342 Allocates memory; use $(LREF lineSplitter) for an alternative that
2343 does not.
2344
2345 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2346
2347 Params:
2348 s = a string of $(D chars), $(D wchars), or $(D dchars), or any custom
2349 type that casts to a $(D string) type
2350 keepTerm = whether delimiter is included or not in the results
2351 Returns:
2352 array of strings, each element is a line that is a slice of $(D s)
2353 See_Also:
2354 $(LREF lineSplitter)
2355 $(REF splitter, std,algorithm)
2356 $(REF splitter, std,regex)
2357 +/
2358 alias KeepTerminator = Flag!"keepTerminator";
2359
2360 /// ditto
2361 S[] splitLines(S)(S s, in KeepTerminator keepTerm = No.keepTerminator) @safe pure
2362 if (isSomeString!S)
2363 {
2364 import std.array : appender;
2365 import std.uni : lineSep, paraSep;
2366
2367 size_t iStart = 0;
2368 auto retval = appender!(S[])();
2369
2370 for (size_t i; i < s.length; ++i)
2371 {
2372 switch (s[i])
2373 {
2374 case '\v', '\f', '\n':
2375 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator)]);
2376 iStart = i + 1;
2377 break;
2378
2379 case '\r':
2380 if (i + 1 < s.length && s[i + 1] == '\n')
2381 {
2382 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2383 iStart = i + 2;
2384 ++i;
2385 }
2386 else
2387 {
2388 goto case '\n';
2389 }
2390 break;
2391
2392 static if (s[i].sizeof == 1)
2393 {
2394 /* Manually decode:
2395 * lineSep is E2 80 A8
2396 * paraSep is E2 80 A9
2397 */
2398 case 0xE2:
2399 if (i + 2 < s.length &&
2400 s[i + 1] == 0x80 &&
2401 (s[i + 2] == 0xA8 || s[i + 2] == 0xA9)
2402 )
2403 {
2404 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 3]);
2405 iStart = i + 3;
2406 i += 2;
2407 }
2408 else
2409 goto default;
2410 break;
2411 /* Manually decode:
2412 * NEL is C2 85
2413 */
2414 case 0xC2:
2415 if (i + 1 < s.length && s[i + 1] == 0x85)
2416 {
2417 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2418 iStart = i + 2;
2419 i += 1;
2420 }
2421 else
2422 goto default;
2423 break;
2424 }
2425 else
2426 {
2427 case lineSep:
2428 case paraSep:
2429 case '\u0085':
2430 goto case '\n';
2431 }
2432
2433 default:
2434 break;
2435 }
2436 }
2437
2438 if (iStart != s.length)
2439 retval.put(s[iStart .. $]);
2440
2441 return retval.data;
2442 }
2443
2444 ///
2445 @safe pure nothrow unittest
2446 {
2447 string s = "Hello\nmy\rname\nis";
2448 assert(splitLines(s) == ["Hello", "my", "name", "is"]);
2449 }
2450
2451 @safe pure nothrow unittest
2452 {
2453 string s = "a\xC2\x86b";
2454 assert(splitLines(s) == [s]);
2455 }
2456
2457 auto splitLines(S)(auto ref S s, in KeepTerminator keepTerm = No.keepTerminator)
2458 if (!isSomeString!S && is(StringTypeOf!S))
2459 {
2460 return splitLines!(StringTypeOf!S)(s, keepTerm);
2461 }
2462
2463 @safe pure nothrow unittest
2464 {
2465 assert(testAliasedString!splitLines("hello\nworld"));
2466 }
2467
2468 @safe pure unittest
2469 {
2470 import std.conv : to;
2471 import std.exception : assertCTFEable;
2472
2473 assertCTFEable!(
2474 {
2475 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2476 {
2477 auto s = to!S(
2478 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n" ~
2479 "mon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2480 );
2481 auto lines = splitLines(s);
2482 assert(lines.length == 14);
2483 assert(lines[0] == "");
2484 assert(lines[1] == "peter");
2485 assert(lines[2] == "");
2486 assert(lines[3] == "paul");
2487 assert(lines[4] == "jerry");
2488 assert(lines[5] == "ice");
2489 assert(lines[6] == "cream");
2490 assert(lines[7] == "");
2491 assert(lines[8] == "sunday");
2492 assert(lines[9] == "mon\u2030day");
2493 assert(lines[10] == "schadenfreude");
2494 assert(lines[11] == "kindergarten");
2495 assert(lines[12] == "");
2496 assert(lines[13] == "cookies");
2497
2498
2499 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
2500 auto ulines = splitLines(cast(char[]) u);
2501 assert(cast(ubyte[])(ulines[0]) == u);
2502
2503 lines = splitLines(s, Yes.keepTerminator);
2504 assert(lines.length == 14);
2505 assert(lines[0] == "\r");
2506 assert(lines[1] == "peter\n");
2507 assert(lines[2] == "\r");
2508 assert(lines[3] == "paul\r\n");
2509 assert(lines[4] == "jerry\u2028");
2510 assert(lines[5] == "ice\u2029");
2511 assert(lines[6] == "cream\n");
2512 assert(lines[7] == "\n");
2513 assert(lines[8] == "sunday\n");
2514 assert(lines[9] == "mon\u2030day\n");
2515 assert(lines[10] == "schadenfreude\v");
2516 assert(lines[11] == "kindergarten\f");
2517 assert(lines[12] == "\v");
2518 assert(lines[13] == "cookies\u0085");
2519
2520 s.popBack(); // Lop-off trailing \n
2521 lines = splitLines(s);
2522 assert(lines.length == 14);
2523 assert(lines[9] == "mon\u2030day");
2524
2525 lines = splitLines(s, Yes.keepTerminator);
2526 assert(lines.length == 14);
2527 assert(lines[13] == "cookies");
2528 }
2529 });
2530 }
2531
2532 private struct LineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)
2533 {
2534 import std.conv : unsigned;
2535 import std.uni : lineSep, paraSep;
2536 private:
2537 Range _input;
2538
2539 alias IndexType = typeof(unsigned(_input.length));
2540 enum IndexType _unComputed = IndexType.max;
2541 IndexType iStart = _unComputed;
2542 IndexType iEnd = 0;
2543 IndexType iNext = 0;
2544
2545 public:
thisLineSplitter2546 this(Range input)
2547 {
2548 _input = input;
2549 }
2550
2551 static if (isInfinite!Range)
2552 {
2553 enum bool empty = false;
2554 }
2555 else
2556 {
emptyLineSplitter2557 @property bool empty()
2558 {
2559 return iStart == _unComputed && iNext == _input.length;
2560 }
2561 }
2562
typeofLineSplitter2563 @property typeof(_input) front()
2564 {
2565 if (iStart == _unComputed)
2566 {
2567 iStart = iNext;
2568 Loop:
2569 for (IndexType i = iNext; ; ++i)
2570 {
2571 if (i == _input.length)
2572 {
2573 iEnd = i;
2574 iNext = i;
2575 break Loop;
2576 }
2577 switch (_input[i])
2578 {
2579 case '\v', '\f', '\n':
2580 iEnd = i + (keepTerm == Yes.keepTerminator);
2581 iNext = i + 1;
2582 break Loop;
2583
2584 case '\r':
2585 if (i + 1 < _input.length && _input[i + 1] == '\n')
2586 {
2587 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2588 iNext = i + 2;
2589 break Loop;
2590 }
2591 else
2592 {
2593 goto case '\n';
2594 }
2595
2596 static if (_input[i].sizeof == 1)
2597 {
2598 /* Manually decode:
2599 * lineSep is E2 80 A8
2600 * paraSep is E2 80 A9
2601 */
2602 case 0xE2:
2603 if (i + 2 < _input.length &&
2604 _input[i + 1] == 0x80 &&
2605 (_input[i + 2] == 0xA8 || _input[i + 2] == 0xA9)
2606 )
2607 {
2608 iEnd = i + (keepTerm == Yes.keepTerminator) * 3;
2609 iNext = i + 3;
2610 break Loop;
2611 }
2612 else
2613 goto default;
2614 /* Manually decode:
2615 * NEL is C2 85
2616 */
2617 case 0xC2:
2618 if (i + 1 < _input.length && _input[i + 1] == 0x85)
2619 {
2620 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2621 iNext = i + 2;
2622 break Loop;
2623 }
2624 else
2625 goto default;
2626 }
2627 else
2628 {
2629 case '\u0085':
2630 case lineSep:
2631 case paraSep:
2632 goto case '\n';
2633 }
2634
2635 default:
2636 break;
2637 }
2638 }
2639 }
2640 return _input[iStart .. iEnd];
2641 }
2642
popFrontLineSplitter2643 void popFront()
2644 {
2645 if (iStart == _unComputed)
2646 {
2647 assert(!empty);
2648 front;
2649 }
2650 iStart = _unComputed;
2651 }
2652
2653 static if (isForwardRange!Range)
2654 {
typeofLineSplitter2655 @property typeof(this) save()
2656 {
2657 auto ret = this;
2658 ret._input = _input.save;
2659 return ret;
2660 }
2661 }
2662 }
2663
2664 /***********************************
2665 * Split an array or slicable range of characters into a range of lines
2666 using $(D '\r'), $(D '\n'), $(D '\v'), $(D '\f'), $(D "\r\n"),
2667 $(REF lineSep, std,uni), $(REF paraSep, std,uni) and $(D '\u0085') (NEL)
2668 as delimiters. If $(D keepTerm) is set to $(D Yes.keepTerminator), then the
2669 delimiter is included in the slices returned.
2670
2671 Does not throw on invalid UTF; such is simply passed unchanged
2672 to the output.
2673
2674 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2675
2676 Does not allocate memory.
2677
2678 Params:
2679 r = array of $(D chars), $(D wchars), or $(D dchars) or a slicable range
2680 keepTerm = whether delimiter is included or not in the results
2681 Returns:
2682 range of slices of the input range $(D r)
2683
2684 See_Also:
2685 $(LREF splitLines)
2686 $(REF splitter, std,algorithm)
2687 $(REF splitter, std,regex)
2688 */
2689 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(Range r)
2690 if ((hasSlicing!Range && hasLength!Range && isSomeChar!(ElementType!Range) ||
2691 isSomeString!Range) &&
2692 !isConvertibleToString!Range)
2693 {
2694 return LineSplitter!(keepTerm, Range)(r);
2695 }
2696
2697 ///
2698 @safe pure unittest
2699 {
2700 import std.array : array;
2701
2702 string s = "Hello\nmy\rname\nis";
2703
2704 /* notice the call to 'array' to turn the lazy range created by
2705 lineSplitter comparable to the string[] created by splitLines.
2706 */
2707 assert(lineSplitter(s).array == splitLines(s));
2708 }
2709
2710 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(auto ref Range r)
2711 if (isConvertibleToString!Range)
2712 {
2713 return LineSplitter!(keepTerm, StringTypeOf!Range)(r);
2714 }
2715
2716 @safe pure unittest
2717 {
2718 import std.array : array;
2719 import std.conv : to;
2720 import std.exception : assertCTFEable;
2721
2722 assertCTFEable!(
2723 {
2724 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2725 {
2726 auto s = to!S(
2727 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\n" ~
2728 "sunday\nmon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2729 );
2730
2731 auto lines = lineSplitter(s).array;
2732 assert(lines.length == 14);
2733 assert(lines[0] == "");
2734 assert(lines[1] == "peter");
2735 assert(lines[2] == "");
2736 assert(lines[3] == "paul");
2737 assert(lines[4] == "jerry");
2738 assert(lines[5] == "ice");
2739 assert(lines[6] == "cream");
2740 assert(lines[7] == "");
2741 assert(lines[8] == "sunday");
2742 assert(lines[9] == "mon\u2030day");
2743 assert(lines[10] == "schadenfreude");
2744 assert(lines[11] == "kindergarten");
2745 assert(lines[12] == "");
2746 assert(lines[13] == "cookies");
2747
2748
2749 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
2750 auto ulines = lineSplitter(cast(char[]) u).array;
2751 assert(cast(ubyte[])(ulines[0]) == u);
2752
2753 lines = lineSplitter!(Yes.keepTerminator)(s).array;
2754 assert(lines.length == 14);
2755 assert(lines[0] == "\r");
2756 assert(lines[1] == "peter\n");
2757 assert(lines[2] == "\r");
2758 assert(lines[3] == "paul\r\n");
2759 assert(lines[4] == "jerry\u2028");
2760 assert(lines[5] == "ice\u2029");
2761 assert(lines[6] == "cream\n");
2762 assert(lines[7] == "\n");
2763 assert(lines[8] == "sunday\n");
2764 assert(lines[9] == "mon\u2030day\n");
2765 assert(lines[10] == "schadenfreude\v");
2766 assert(lines[11] == "kindergarten\f");
2767 assert(lines[12] == "\v");
2768 assert(lines[13] == "cookies\u0085");
2769
2770 s.popBack(); // Lop-off trailing \n
2771 lines = lineSplitter(s).array;
2772 assert(lines.length == 14);
2773 assert(lines[9] == "mon\u2030day");
2774
2775 lines = lineSplitter!(Yes.keepTerminator)(s).array;
2776 assert(lines.length == 14);
2777 assert(lines[13] == "cookies");
2778 }
2779 });
2780 }
2781
2782 ///
2783 @nogc @safe pure unittest
2784 {
2785 auto s = "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\nmon\u2030day\n";
2786 auto lines = s.lineSplitter();
2787 static immutable witness = ["", "peter", "", "paul", "jerry", "ice", "cream", "", "sunday", "mon\u2030day"];
2788 uint i;
foreach(line;lines)2789 foreach (line; lines)
2790 {
2791 assert(line == witness[i++]);
2792 }
2793 assert(i == witness.length);
2794 }
2795
2796 @nogc @safe pure unittest
2797 {
2798 import std.algorithm.comparison : equal;
2799 auto s = "std/string.d";
2800 auto as = TestAliasedString(s);
2801 assert(equal(s.lineSplitter(), as.lineSplitter()));
2802 }
2803
2804 @safe pure unittest
2805 {
2806 auto s = "line1\nline2";
2807 auto spl0 = s.lineSplitter!(Yes.keepTerminator);
2808 auto spl1 = spl0.save;
2809 spl0.popFront;
2810 assert(spl1.front ~ spl0.front == s);
2811 string r = "a\xC2\x86b";
2812 assert(r.lineSplitter.front == r);
2813 }
2814
2815 /++
2816 Strips leading whitespace (as defined by $(REF isWhite, std,uni)).
2817
2818 Params:
2819 input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
2820 of characters
2821
2822 Returns: $(D input) stripped of leading whitespace.
2823
2824 Postconditions: $(D input) and the returned value
2825 will share the same tail (see $(REF sameTail, std,array)).
2826
2827 See_Also:
2828 Generic stripping on ranges: $(REF _stripLeft, std, algorithm, mutation)
2829 +/
2830 auto stripLeft(Range)(Range input)
2831 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
2832 !isInfinite!Range && !isConvertibleToString!Range)
2833 {
2834 static import std.ascii;
2835 static import std.uni;
2836 import std.utf : decodeFront;
2837
2838 while (!input.empty)
2839 {
2840 auto c = input.front;
2841 if (std.ascii.isASCII(c))
2842 {
2843 if (!std.ascii.isWhite(c))
2844 break;
2845 input.popFront();
2846 }
2847 else
2848 {
2849 auto save = input.save;
2850 auto dc = decodeFront(input);
2851 if (!std.uni.isWhite(dc))
2852 return save;
2853 }
2854 }
2855 return input;
2856 }
2857
2858 ///
2859 @safe pure unittest
2860 {
2861 import std.uni : lineSep, paraSep;
2862 assert(stripLeft(" hello world ") ==
2863 "hello world ");
2864 assert(stripLeft("\n\t\v\rhello world\n\t\v\r") ==
2865 "hello world\n\t\v\r");
2866 assert(stripLeft("hello world") ==
2867 "hello world");
2868 assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) ==
2869 "hello world" ~ [lineSep]);
2870 assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) ==
2871 "hello world" ~ [paraSep]);
2872
2873 import std.array : array;
2874 import std.utf : byChar;
2875 assert(stripLeft(" hello world "w.byChar).array ==
2876 "hello world ");
2877 }
2878
2879 auto stripLeft(Range)(auto ref Range str)
2880 if (isConvertibleToString!Range)
2881 {
2882 return stripLeft!(StringTypeOf!Range)(str);
2883 }
2884
2885 @safe pure unittest
2886 {
2887 assert(testAliasedString!stripLeft(" hello"));
2888 }
2889
2890 /++
2891 Strips trailing whitespace (as defined by $(REF isWhite, std,uni)).
2892
2893 Params:
2894 str = string or random access range of characters
2895
2896 Returns:
2897 slice of $(D str) stripped of trailing whitespace.
2898
2899 See_Also:
2900 Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation)
2901 +/
2902 auto stripRight(Range)(Range str)
2903 if (isSomeString!Range ||
2904 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
2905 !isConvertibleToString!Range &&
2906 isSomeChar!(ElementEncodingType!Range))
2907 {
2908 import std.uni : isWhite;
2909 alias C = Unqual!(ElementEncodingType!(typeof(str)));
2910
2911 static if (isSomeString!(typeof(str)))
2912 {
2913 import std.utf : codeLength;
2914
foreach_reverse(i,dchar c;str)2915 foreach_reverse (i, dchar c; str)
2916 {
2917 if (!isWhite(c))
2918 return str[0 .. i + codeLength!C(c)];
2919 }
2920
2921 return str[0 .. 0];
2922 }
2923 else
2924 {
2925 size_t i = str.length;
2926 while (i--)
2927 {
2928 static if (C.sizeof == 4)
2929 {
2930 if (isWhite(str[i]))
2931 continue;
2932 break;
2933 }
2934 else static if (C.sizeof == 2)
2935 {
2936 auto c2 = str[i];
2937 if (c2 < 0xD800 || c2 >= 0xE000)
2938 {
2939 if (isWhite(c2))
2940 continue;
2941 }
2942 else if (c2 >= 0xDC00)
2943 {
2944 if (i)
2945 {
2946 immutable c1 = str[i - 1];
2947 if (c1 >= 0xD800 && c1 < 0xDC00)
2948 {
2949 immutable dchar c = ((c1 - 0xD7C0) << 10) + (c2 - 0xDC00);
2950 if (isWhite(c))
2951 {
2952 --i;
2953 continue;
2954 }
2955 }
2956 }
2957 }
2958 break;
2959 }
2960 else static if (C.sizeof == 1)
2961 {
2962 import std.utf : byDchar;
2963
2964 char cx = str[i];
2965 if (cx <= 0x7F)
2966 {
2967 if (isWhite(cx))
2968 continue;
2969 break;
2970 }
2971 else
2972 {
2973 size_t stride = 0;
2974
2975 while (1)
2976 {
2977 ++stride;
2978 if (!i || (cx & 0xC0) == 0xC0 || stride == 4)
2979 break;
2980 cx = str[i - 1];
2981 if (!(cx & 0x80))
2982 break;
2983 --i;
2984 }
2985
2986 if (!str[i .. i + stride].byDchar.front.isWhite)
2987 return str[0 .. i + stride];
2988 }
2989 }
2990 else
2991 static assert(0);
2992 }
2993
2994 return str[0 .. i + 1];
2995 }
2996 }
2997
2998 ///
2999 @safe pure
3000 unittest
3001 {
3002 import std.uni : lineSep, paraSep;
3003 assert(stripRight(" hello world ") ==
3004 " hello world");
3005 assert(stripRight("\n\t\v\rhello world\n\t\v\r") ==
3006 "\n\t\v\rhello world");
3007 assert(stripRight("hello world") ==
3008 "hello world");
3009 assert(stripRight([lineSep] ~ "hello world" ~ lineSep) ==
3010 [lineSep] ~ "hello world");
3011 assert(stripRight([paraSep] ~ "hello world" ~ paraSep) ==
3012 [paraSep] ~ "hello world");
3013 }
3014
3015 auto stripRight(Range)(auto ref Range str)
3016 if (isConvertibleToString!Range)
3017 {
3018 return stripRight!(StringTypeOf!Range)(str);
3019 }
3020
3021 @safe pure unittest
3022 {
3023 assert(testAliasedString!stripRight("hello "));
3024 }
3025
3026 @safe pure unittest
3027 {
3028 import std.array : array;
3029 import std.uni : lineSep, paraSep;
3030 import std.utf : byChar, byDchar, byUTF, byWchar, invalidUTFstrings;
3031 assert(stripRight(" hello world ".byChar).array == " hello world");
3032 assert(stripRight("\n\t\v\rhello world\n\t\v\r"w.byWchar).array == "\n\t\v\rhello world"w);
3033 assert(stripRight("hello world"d.byDchar).array == "hello world"d);
3034 assert(stripRight("\u2028hello world\u2020\u2028".byChar).array == "\u2028hello world\u2020");
3035 assert(stripRight("hello world\U00010001"w.byWchar).array == "hello world\U00010001"w);
3036
3037 foreach (C; AliasSeq!(char, wchar, dchar))
3038 {
3039 foreach (s; invalidUTFstrings!C())
3040 {
3041 cast(void) stripRight(s.byUTF!C).array;
3042 }
3043 }
3044
3045 cast(void) stripRight("a\x80".byUTF!char).array;
3046 wstring ws = ['a', cast(wchar) 0xDC00];
3047 cast(void) stripRight(ws.byUTF!wchar).array;
3048 }
3049
3050
3051 /++
3052 Strips both leading and trailing whitespace (as defined by
3053 $(REF isWhite, std,uni)).
3054
3055 Params:
3056 str = string or random access range of characters
3057
3058 Returns:
3059 slice of $(D str) stripped of leading and trailing whitespace.
3060
3061 See_Also:
3062 Generic stripping on ranges: $(REF _strip, std, algorithm, mutation)
3063 +/
3064 auto strip(Range)(Range str)
3065 if (isSomeString!Range ||
3066 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
3067 !isConvertibleToString!Range &&
3068 isSomeChar!(ElementEncodingType!Range))
3069 {
3070 return stripRight(stripLeft(str));
3071 }
3072
3073 ///
3074 @safe pure unittest
3075 {
3076 import std.uni : lineSep, paraSep;
3077 assert(strip(" hello world ") ==
3078 "hello world");
3079 assert(strip("\n\t\v\rhello world\n\t\v\r") ==
3080 "hello world");
3081 assert(strip("hello world") ==
3082 "hello world");
3083 assert(strip([lineSep] ~ "hello world" ~ [lineSep]) ==
3084 "hello world");
3085 assert(strip([paraSep] ~ "hello world" ~ [paraSep]) ==
3086 "hello world");
3087 }
3088
3089 auto strip(Range)(auto ref Range str)
3090 if (isConvertibleToString!Range)
3091 {
3092 return strip!(StringTypeOf!Range)(str);
3093 }
3094
3095 @safe pure unittest
3096 {
3097 assert(testAliasedString!strip(" hello world "));
3098 }
3099
3100 @safe pure unittest
3101 {
3102 import std.algorithm.comparison : equal;
3103 import std.conv : to;
3104 import std.exception : assertCTFEable;
3105
3106 assertCTFEable!(
3107 {
3108 foreach (S; AliasSeq!( char[], const char[], string,
3109 wchar[], const wchar[], wstring,
3110 dchar[], const dchar[], dstring))
3111 {
3112 assert(equal(stripLeft(to!S(" foo\t ")), "foo\t "));
3113 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007"));
3114 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB \r"));
3115 assert(equal(stripLeft(to!S("1")), "1"));
3116 assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE"));
3117 assert(equal(stripLeft(to!S("")), ""));
3118
3119 assert(equal(stripRight(to!S(" foo\t ")), " foo"));
3120 assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo"));
3121 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB"));
3122 assert(equal(stripRight(to!S("1")), "1"));
3123 assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE"));
3124 assert(equal(stripRight(to!S("")), ""));
3125
3126 assert(equal(strip(to!S(" foo\t ")), "foo"));
3127 assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo"));
3128 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB"));
3129 assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE"));
3130 assert(equal(strip(to!S("")), ""));
3131 }
3132 });
3133 }
3134
3135 @safe pure unittest
3136 {
3137 import std.array : sameHead, sameTail;
3138 import std.exception : assertCTFEable;
3139 assertCTFEable!(
3140 {
3141 wstring s = " ";
3142 assert(s.sameTail(s.stripLeft()));
3143 assert(s.sameHead(s.stripRight()));
3144 });
3145 }
3146
3147
3148 /++
3149 If $(D str) ends with $(D delimiter), then $(D str) is returned without
3150 $(D delimiter) on its end. If it $(D str) does $(I not) end with
3151 $(D delimiter), then it is returned unchanged.
3152
3153 If no $(D delimiter) is given, then one trailing $(D '\r'), $(D '\n'),
3154 $(D "\r\n"), $(D '\f'), $(D '\v'), $(REF lineSep, std,uni), $(REF paraSep, std,uni), or $(REF nelSep, std,uni)
3155 is removed from the end of $(D str). If $(D str) does not end with any of those characters,
3156 then it is returned unchanged.
3157
3158 Params:
3159 str = string or indexable range of characters
3160 delimiter = string of characters to be sliced off end of str[]
3161
3162 Returns:
3163 slice of str
3164 +/
3165 Range chomp(Range)(Range str)
3166 if ((isRandomAccessRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3167 isNarrowString!Range) &&
3168 !isConvertibleToString!Range)
3169 {
3170 import std.uni : lineSep, paraSep, nelSep;
3171 if (str.empty)
3172 return str;
3173
3174 alias C = ElementEncodingType!Range;
3175
3176 switch (str[$ - 1])
3177 {
3178 case '\n':
3179 {
3180 if (str.length > 1 && str[$ - 2] == '\r')
3181 return str[0 .. $ - 2];
3182 goto case;
3183 }
3184 case '\r', '\v', '\f':
3185 return str[0 .. $ - 1];
3186
3187 // Pop off the last character if lineSep, paraSep, or nelSep
3188 static if (is(C : const char))
3189 {
3190 /* Manually decode:
3191 * lineSep is E2 80 A8
3192 * paraSep is E2 80 A9
3193 */
3194 case 0xA8: // Last byte of lineSep
3195 case 0xA9: // Last byte of paraSep
3196 if (str.length > 2 && str[$ - 2] == 0x80 && str[$ - 3] == 0xE2)
3197 return str [0 .. $ - 3];
3198 goto default;
3199
3200 /* Manually decode:
3201 * NEL is C2 85
3202 */
3203 case 0x85:
3204 if (str.length > 1 && str[$ - 2] == 0xC2)
3205 return str [0 .. $ - 2];
3206 goto default;
3207 }
3208 else
3209 {
3210 case lineSep:
3211 case paraSep:
3212 case nelSep:
3213 return str[0 .. $ - 1];
3214 }
3215 default:
3216 return str;
3217 }
3218 }
3219
3220 /// Ditto
3221 Range chomp(Range, C2)(Range str, const(C2)[] delimiter)
3222 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3223 isNarrowString!Range) &&
3224 !isConvertibleToString!Range &&
3225 isSomeChar!C2)
3226 {
3227 if (delimiter.empty)
3228 return chomp(str);
3229
3230 alias C1 = ElementEncodingType!Range;
3231
3232 static if (is(Unqual!C1 == Unqual!C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3233 {
3234 import std.algorithm.searching : endsWith;
3235 if (str.endsWith(delimiter))
3236 return str[0 .. $ - delimiter.length];
3237 return str;
3238 }
3239 else
3240 {
3241 auto orig = str.save;
3242
3243 static if (isSomeString!Range)
3244 alias C = dchar; // because strings auto-decode
3245 else
3246 alias C = C1; // and ranges do not
3247
foreach_reverse(C c;delimiter)3248 foreach_reverse (C c; delimiter)
3249 {
3250 if (str.empty || str.back != c)
3251 return orig;
3252
3253 str.popBack();
3254 }
3255
3256 return str;
3257 }
3258 }
3259
3260 ///
3261 @safe pure
3262 unittest
3263 {
3264 import std.uni : lineSep, paraSep, nelSep;
3265 import std.utf : decode;
3266 assert(chomp(" hello world \n\r") == " hello world \n");
3267 assert(chomp(" hello world \r\n") == " hello world ");
3268 assert(chomp(" hello world \f") == " hello world ");
3269 assert(chomp(" hello world \v") == " hello world ");
3270 assert(chomp(" hello world \n\n") == " hello world \n");
3271 assert(chomp(" hello world \n\n ") == " hello world \n\n ");
3272 assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n");
3273 assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n");
3274 assert(chomp(" hello world \n\n" ~ [ nelSep]) == " hello world \n\n");
3275 assert(chomp(" hello world") == " hello world");
3276 assert(chomp("") == "");
3277
3278 assert(chomp(" hello world", "orld") == " hello w");
3279 assert(chomp(" hello world", " he") == " hello world");
3280 assert(chomp("", "hello") == "");
3281
3282 // Don't decode pointlessly
3283 assert(chomp("hello\xFE", "\r") == "hello\xFE");
3284 }
3285
3286 StringTypeOf!Range chomp(Range)(auto ref Range str)
3287 if (isConvertibleToString!Range)
3288 {
3289 return chomp!(StringTypeOf!Range)(str);
3290 }
3291
3292 StringTypeOf!Range chomp(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3293 if (isConvertibleToString!Range)
3294 {
3295 return chomp!(StringTypeOf!Range, C2)(str, delimiter);
3296 }
3297
3298 @safe pure unittest
3299 {
3300 assert(testAliasedString!chomp(" hello world \n\r"));
3301 assert(testAliasedString!chomp(" hello world", "orld"));
3302 }
3303
3304 @safe pure unittest
3305 {
3306 import std.conv : to;
3307 import std.exception : assertCTFEable;
3308
3309 string s;
3310
3311 assertCTFEable!(
3312 {
3313 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3314 {
3315 // @@@ BUG IN COMPILER, MUST INSERT CAST
3316 assert(chomp(cast(S) null) is null);
3317 assert(chomp(to!S("hello")) == "hello");
3318 assert(chomp(to!S("hello\n")) == "hello");
3319 assert(chomp(to!S("hello\r")) == "hello");
3320 assert(chomp(to!S("hello\r\n")) == "hello");
3321 assert(chomp(to!S("hello\n\r")) == "hello\n");
3322 assert(chomp(to!S("hello\n\n")) == "hello\n");
3323 assert(chomp(to!S("hello\r\r")) == "hello\r");
3324 assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx");
3325 assert(chomp(to!S("hello\u2028")) == "hello");
3326 assert(chomp(to!S("hello\u2029")) == "hello");
3327 assert(chomp(to!S("hello\u0085")) == "hello");
3328 assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028");
3329 assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029");
3330 assert(chomp(to!S("hello\u2029\u2129")) == "hello\u2029\u2129");
3331 assert(chomp(to!S("hello\u2029\u0185")) == "hello\u2029\u0185");
3332
3333 foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3334 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
3335 // @@@ BUG IN COMPILER, MUST INSERT CAST
3336 assert(chomp(cast(S) null, cast(T) null) is null);
3337 assert(chomp(to!S("hello\n"), cast(T) null) == "hello");
3338 assert(chomp(to!S("hello"), to!T("o")) == "hell");
3339 assert(chomp(to!S("hello"), to!T("p")) == "hello");
3340 // @@@ BUG IN COMPILER, MUST INSERT CAST
3341 assert(chomp(to!S("hello"), cast(T) null) == "hello");
3342 assert(chomp(to!S("hello"), to!T("llo")) == "he");
3343 assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e");
3344 assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e");
3345 }();
3346 }
3347 });
3348
3349 // Ranges
3350 import std.array : array;
3351 import std.utf : byChar, byWchar, byDchar;
3352 assert(chomp("hello world\r\n" .byChar ).array == "hello world");
3353 assert(chomp("hello world\r\n"w.byWchar).array == "hello world"w);
3354 assert(chomp("hello world\r\n"d.byDchar).array == "hello world"d);
3355
3356 assert(chomp("hello world"d.byDchar, "ld").array == "hello wor"d);
3357
3358 assert(chomp("hello\u2020" .byChar , "\u2020").array == "hello");
3359 assert(chomp("hello\u2020"d.byDchar, "\u2020"d).array == "hello"d);
3360 }
3361
3362
3363 /++
3364 If $(D str) starts with $(D delimiter), then the part of $(D str) following
3365 $(D delimiter) is returned. If $(D str) does $(I not) start with
3366
3367 $(D delimiter), then it is returned unchanged.
3368
3369 Params:
3370 str = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3371 of characters
3372 delimiter = string of characters to be sliced off front of str[]
3373
3374 Returns:
3375 slice of str
3376 +/
3377 Range chompPrefix(Range, C2)(Range str, const(C2)[] delimiter)
3378 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3379 isNarrowString!Range) &&
3380 !isConvertibleToString!Range &&
3381 isSomeChar!C2)
3382 {
3383 alias C1 = ElementEncodingType!Range;
3384
3385 static if (is(Unqual!C1 == Unqual!C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3386 {
3387 import std.algorithm.searching : startsWith;
3388 if (str.startsWith(delimiter))
3389 return str[delimiter.length .. $];
3390 return str;
3391 }
3392 else
3393 {
3394 auto orig = str.save;
3395
3396 static if (isSomeString!Range)
3397 alias C = dchar; // because strings auto-decode
3398 else
3399 alias C = C1; // and ranges do not
3400
foreach(C c;delimiter)3401 foreach (C c; delimiter)
3402 {
3403 if (str.empty || str.front != c)
3404 return orig;
3405
3406 str.popFront();
3407 }
3408
3409 return str;
3410 }
3411 }
3412
3413 ///
3414 @safe pure unittest
3415 {
3416 assert(chompPrefix("hello world", "he") == "llo world");
3417 assert(chompPrefix("hello world", "hello w") == "orld");
3418 assert(chompPrefix("hello world", " world") == "hello world");
3419 assert(chompPrefix("", "hello") == "");
3420 }
3421
3422 StringTypeOf!Range chompPrefix(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3423 if (isConvertibleToString!Range)
3424 {
3425 return chompPrefix!(StringTypeOf!Range, C2)(str, delimiter);
3426 }
3427
3428 @safe pure
3429 unittest
3430 {
3431 import std.algorithm.comparison : equal;
3432 import std.conv : to;
3433 import std.exception : assertCTFEable;
3434 assertCTFEable!(
3435 {
3436 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3437 {
3438 foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3439 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
3440 assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh"));
3441 assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde"));
3442 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), ""));
3443 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co"));
3444 assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el"));
3445 }();
3446 }
3447 });
3448
3449 // Ranges
3450 import std.array : array;
3451 import std.utf : byChar, byWchar, byDchar;
3452 assert(chompPrefix("hello world" .byChar , "hello"d).array == " world");
3453 assert(chompPrefix("hello world"w.byWchar, "hello" ).array == " world"w);
3454 assert(chompPrefix("hello world"d.byDchar, "hello"w).array == " world"d);
3455 assert(chompPrefix("hello world"c.byDchar, "hello"w).array == " world"d);
3456
3457 assert(chompPrefix("hello world"d.byDchar, "lx").array == "hello world"d);
3458 assert(chompPrefix("hello world"d.byDchar, "hello world xx").array == "hello world"d);
3459
3460 assert(chompPrefix("\u2020world" .byChar , "\u2020").array == "world");
3461 assert(chompPrefix("\u2020world"d.byDchar, "\u2020"d).array == "world"d);
3462 }
3463
3464 @safe pure unittest
3465 {
3466 assert(testAliasedString!chompPrefix("hello world", "hello"));
3467 }
3468
3469 /++
3470 Returns $(D str) without its last character, if there is one. If $(D str)
3471 ends with $(D "\r\n"), then both are removed. If $(D str) is empty, then
3472 then it is returned unchanged.
3473
3474 Params:
3475 str = string (must be valid UTF)
3476 Returns:
3477 slice of str
3478 +/
3479
3480 Range chop(Range)(Range str)
3481 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3482 isNarrowString!Range) &&
3483 !isConvertibleToString!Range)
3484 {
3485 if (str.empty)
3486 return str;
3487
3488 static if (isSomeString!Range)
3489 {
3490 if (str.length >= 2 && str[$ - 1] == '\n' && str[$ - 2] == '\r')
3491 return str[0 .. $ - 2];
3492 str.popBack();
3493 return str;
3494 }
3495 else
3496 {
3497 alias C = Unqual!(ElementEncodingType!Range);
3498 C c = str.back;
3499 str.popBack();
3500 if (c == '\n')
3501 {
3502 if (!str.empty && str.back == '\r')
3503 str.popBack();
3504 return str;
3505 }
3506 // Pop back a dchar, not just a code unit
3507 static if (C.sizeof == 1)
3508 {
3509 int cnt = 1;
3510 while ((c & 0xC0) == 0x80)
3511 {
3512 if (str.empty)
3513 break;
3514 c = str.back;
3515 str.popBack();
3516 if (++cnt > 4)
3517 break;
3518 }
3519 }
3520 else static if (C.sizeof == 2)
3521 {
3522 if (c >= 0xD800 && c <= 0xDBFF)
3523 {
3524 if (!str.empty)
3525 str.popBack();
3526 }
3527 }
3528 else static if (C.sizeof == 4)
3529 {
3530 }
3531 else
3532 static assert(0);
3533 return str;
3534 }
3535 }
3536
3537 ///
3538 @safe pure unittest
3539 {
3540 assert(chop("hello world") == "hello worl");
3541 assert(chop("hello world\n") == "hello world");
3542 assert(chop("hello world\r") == "hello world");
3543 assert(chop("hello world\n\r") == "hello world\n");
3544 assert(chop("hello world\r\n") == "hello world");
3545 assert(chop("Walter Bright") == "Walter Brigh");
3546 assert(chop("") == "");
3547 }
3548
3549 StringTypeOf!Range chop(Range)(auto ref Range str)
3550 if (isConvertibleToString!Range)
3551 {
3552 return chop!(StringTypeOf!Range)(str);
3553 }
3554
3555 @safe pure unittest
3556 {
3557 assert(testAliasedString!chop("hello world"));
3558 }
3559
3560 @safe pure unittest
3561 {
3562 import std.array : array;
3563 import std.utf : byChar, byWchar, byDchar, byCodeUnit, invalidUTFstrings;
3564
3565 assert(chop("hello world".byChar).array == "hello worl");
3566 assert(chop("hello world\n"w.byWchar).array == "hello world"w);
3567 assert(chop("hello world\r"d.byDchar).array == "hello world"d);
3568 assert(chop("hello world\n\r".byChar).array == "hello world\n");
3569 assert(chop("hello world\r\n"w.byWchar).array == "hello world"w);
3570 assert(chop("Walter Bright"d.byDchar).array == "Walter Brigh"d);
3571 assert(chop("".byChar).array == "");
3572
3573 assert(chop(`ミツバチと科学者` .byCodeUnit).array == "ミツバチと科学");
3574 assert(chop(`ミツバチと科学者`w.byCodeUnit).array == "ミツバチと科学"w);
3575 assert(chop(`ミツバチと科学者`d.byCodeUnit).array == "ミツバチと科学"d);
3576
3577 auto ca = invalidUTFstrings!char();
foreach(s;ca)3578 foreach (s; ca)
3579 {
3580 foreach (c; chop(s.byCodeUnit))
3581 {
3582 }
3583 }
3584
3585 auto wa = invalidUTFstrings!wchar();
foreach(s;wa)3586 foreach (s; wa)
3587 {
3588 foreach (c; chop(s.byCodeUnit))
3589 {
3590 }
3591 }
3592 }
3593
3594 @safe pure unittest
3595 {
3596 import std.algorithm.comparison : equal;
3597 import std.conv : to;
3598 import std.exception : assertCTFEable;
3599
3600 assertCTFEable!(
3601 {
3602 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3603 {
3604 assert(chop(cast(S) null) is null);
3605 assert(equal(chop(to!S("hello")), "hell"));
3606 assert(equal(chop(to!S("hello\r\n")), "hello"));
3607 assert(equal(chop(to!S("hello\n\r")), "hello\n"));
3608 assert(equal(chop(to!S("Verité")), "Verit"));
3609 assert(equal(chop(to!S(`さいごの果実`)), "さいごの果"));
3610 assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学"));
3611 }
3612 });
3613 }
3614
3615
3616 /++
3617 Left justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3618 is the character that will be used to fill up the space in the field that
3619 $(D s) doesn't fill.
3620
3621 Params:
3622 s = string
3623 width = minimum field width
3624 fillChar = used to pad end up to $(D width) characters
3625
3626 Returns:
3627 GC allocated string
3628
3629 See_Also:
3630 $(LREF leftJustifier), which does not allocate
3631 +/
3632 S leftJustify(S)(S s, size_t width, dchar fillChar = ' ')
3633 if (isSomeString!S)
3634 {
3635 import std.array : array;
3636 return leftJustifier(s, width, fillChar).array;
3637 }
3638
3639 ///
3640 @safe pure unittest
3641 {
3642 assert(leftJustify("hello", 7, 'X') == "helloXX");
3643 assert(leftJustify("hello", 2, 'X') == "hello");
3644 assert(leftJustify("hello", 9, 'X') == "helloXXXX");
3645 }
3646
3647 /++
3648 Left justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3649 is the character that will be used to fill up the space in the field that
3650 $(D s) doesn't fill.
3651
3652 Params:
3653 r = string or range of characters
3654 width = minimum field width
3655 fillChar = used to pad end up to $(D width) characters
3656
3657 Returns:
3658 a lazy range of the left justified result
3659
3660 See_Also:
3661 $(LREF rightJustifier)
3662 +/
3663
3664 auto leftJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
3665 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
3666 !isConvertibleToString!Range)
3667 {
3668 alias C = Unqual!(ElementEncodingType!Range);
3669
3670 static if (C.sizeof == 1)
3671 {
3672 import std.utf : byDchar, byChar;
3673 return leftJustifier(r.byDchar, width, fillChar).byChar;
3674 }
3675 else static if (C.sizeof == 2)
3676 {
3677 import std.utf : byDchar, byWchar;
3678 return leftJustifier(r.byDchar, width, fillChar).byWchar;
3679 }
3680 else static if (C.sizeof == 4)
3681 {
3682 static struct Result
3683 {
3684 private:
3685 Range _input;
3686 size_t _width;
3687 dchar _fillChar;
3688 size_t len;
3689
3690 public:
3691
emptyResult3692 @property bool empty()
3693 {
3694 return len >= _width && _input.empty;
3695 }
3696
frontResult3697 @property C front()
3698 {
3699 return _input.empty ? _fillChar : _input.front;
3700 }
3701
popFrontResult3702 void popFront()
3703 {
3704 ++len;
3705 if (!_input.empty)
3706 _input.popFront();
3707 }
3708
3709 static if (isForwardRange!Range)
3710 {
3711 @property typeof(this) save() return scope
3712 {
3713 auto ret = this;
3714 ret._input = _input.save;
3715 return ret;
3716 }
3717 }
3718 }
3719
3720 return Result(r, width, fillChar);
3721 }
3722 else
3723 static assert(0);
3724 }
3725
3726 ///
3727 @safe pure @nogc nothrow
3728 unittest
3729 {
3730 import std.algorithm.comparison : equal;
3731 import std.utf : byChar;
3732 assert(leftJustifier("hello", 2).equal("hello".byChar));
3733 assert(leftJustifier("hello", 7).equal("hello ".byChar));
3734 assert(leftJustifier("hello", 7, 'x').equal("helloxx".byChar));
3735 }
3736
3737 auto leftJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
3738 if (isConvertibleToString!Range)
3739 {
3740 return leftJustifier!(StringTypeOf!Range)(r, width, fillChar);
3741 }
3742
3743 @safe pure unittest
3744 {
3745 auto r = "hello".leftJustifier(8);
3746 r.popFront();
3747 auto save = r.save;
3748 r.popFront();
3749 assert(r.front == 'l');
3750 assert(save.front == 'e');
3751 }
3752
3753 @safe pure unittest
3754 {
3755 assert(testAliasedString!leftJustifier("hello", 2));
3756 }
3757
3758 /++
3759 Right justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3760 is the character that will be used to fill up the space in the field that
3761 $(D s) doesn't fill.
3762
3763 Params:
3764 s = string
3765 width = minimum field width
3766 fillChar = used to pad end up to $(D width) characters
3767
3768 Returns:
3769 GC allocated string
3770
3771 See_Also:
3772 $(LREF rightJustifier), which does not allocate
3773 +/
3774 S rightJustify(S)(S s, size_t width, dchar fillChar = ' ')
3775 if (isSomeString!S)
3776 {
3777 import std.array : array;
3778 return rightJustifier(s, width, fillChar).array;
3779 }
3780
3781 ///
3782 @safe pure unittest
3783 {
3784 assert(rightJustify("hello", 7, 'X') == "XXhello");
3785 assert(rightJustify("hello", 2, 'X') == "hello");
3786 assert(rightJustify("hello", 9, 'X') == "XXXXhello");
3787 }
3788
3789 /++
3790 Right justify $(D s) in a field $(D width) characters wide. $(D fillChar)
3791 is the character that will be used to fill up the space in the field that
3792 $(D s) doesn't fill.
3793
3794 Params:
3795 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3796 of characters
3797 width = minimum field width
3798 fillChar = used to pad end up to $(D width) characters
3799
3800 Returns:
3801 a lazy range of the right justified result
3802
3803 See_Also:
3804 $(LREF leftJustifier)
3805 +/
3806
3807 auto rightJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
3808 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
3809 !isConvertibleToString!Range)
3810 {
3811 alias C = Unqual!(ElementEncodingType!Range);
3812
3813 static if (C.sizeof == 1)
3814 {
3815 import std.utf : byDchar, byChar;
3816 return rightJustifier(r.byDchar, width, fillChar).byChar;
3817 }
3818 else static if (C.sizeof == 2)
3819 {
3820 import std.utf : byDchar, byWchar;
3821 return rightJustifier(r.byDchar, width, fillChar).byWchar;
3822 }
3823 else static if (C.sizeof == 4)
3824 {
3825 static struct Result
3826 {
3827 private:
3828 Range _input;
3829 size_t _width;
3830 alias nfill = _width; // number of fill characters to prepend
3831 dchar _fillChar;
3832 bool inited;
3833
3834 // Lazy initialization so constructor is trivial and cannot fail
3835 void initialize()
3836 {
3837 // Replace _width with nfill
3838 // (use alias instead of union because CTFE cannot deal with unions)
3839 assert(_width);
3840 static if (hasLength!Range)
3841 {
3842 immutable len = _input.length;
3843 nfill = (_width > len) ? _width - len : 0;
3844 }
3845 else
3846 {
3847 // Lookahead to see now many fill characters are needed
3848 import std.range : take;
3849 import std.range.primitives : walkLength;
3850 nfill = _width - walkLength(_input.save.take(_width), _width);
3851 }
3852 inited = true;
3853 }
3854
3855 public:
3856 this(Range input, size_t width, dchar fillChar) pure nothrow
3857 {
3858 _input = input;
3859 _fillChar = fillChar;
3860 _width = width;
3861 }
3862
3863 @property bool empty()
3864 {
3865 return !nfill && _input.empty;
3866 }
3867
3868 @property C front()
3869 {
3870 if (!nfill)
3871 return _input.front; // fast path
3872 if (!inited)
3873 initialize();
3874 return nfill ? _fillChar : _input.front;
3875 }
3876
3877 void popFront()
3878 {
3879 if (!nfill)
3880 _input.popFront(); // fast path
3881 else
3882 {
3883 if (!inited)
3884 initialize();
3885 if (nfill)
3886 --nfill;
3887 else
3888 _input.popFront();
3889 }
3890 }
3891
3892 @property typeof(this) save()
3893 {
3894 auto ret = this;
3895 ret._input = _input.save;
3896 return ret;
3897 }
3898 }
3899
3900 return Result(r, width, fillChar);
3901 }
3902 else
3903 static assert(0);
3904 }
3905
3906 ///
3907 @safe pure @nogc nothrow
3908 unittest
3909 {
3910 import std.algorithm.comparison : equal;
3911 import std.utf : byChar;
3912 assert(rightJustifier("hello", 2).equal("hello".byChar));
3913 assert(rightJustifier("hello", 7).equal(" hello".byChar));
3914 assert(rightJustifier("hello", 7, 'x').equal("xxhello".byChar));
3915 }
3916
3917 auto rightJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
3918 if (isConvertibleToString!Range)
3919 {
3920 return rightJustifier!(StringTypeOf!Range)(r, width, fillChar);
3921 }
3922
3923 @safe pure unittest
3924 {
3925 assert(testAliasedString!rightJustifier("hello", 2));
3926 }
3927
3928 @safe pure unittest
3929 {
3930 auto r = "hello"d.rightJustifier(6);
3931 r.popFront();
3932 auto save = r.save;
3933 r.popFront();
3934 assert(r.front == 'e');
3935 assert(save.front == 'h');
3936
3937 auto t = "hello".rightJustifier(7);
3938 t.popFront();
3939 assert(t.front == ' ');
3940 t.popFront();
3941 assert(t.front == 'h');
3942
3943 auto u = "hello"d.rightJustifier(5);
3944 u.popFront();
3945 u.popFront();
3946 u.popFront();
3947 }
3948
3949 /++
3950 Center $(D s) in a field $(D width) characters wide. $(D fillChar)
3951 is the character that will be used to fill up the space in the field that
3952 $(D s) doesn't fill.
3953
3954 Params:
3955 s = The string to center
3956 width = Width of the field to center `s` in
3957 fillChar = The character to use for filling excess space in the field
3958
3959 Returns:
3960 The resulting _center-justified string. The returned string is
3961 GC-allocated. To avoid GC allocation, use $(LREF centerJustifier)
3962 instead.
3963 +/
3964 S center(S)(S s, size_t width, dchar fillChar = ' ')
3965 if (isSomeString!S)
3966 {
3967 import std.array : array;
3968 return centerJustifier(s, width, fillChar).array;
3969 }
3970
3971 ///
3972 @safe pure unittest
3973 {
3974 assert(center("hello", 7, 'X') == "XhelloX");
3975 assert(center("hello", 2, 'X') == "hello");
3976 assert(center("hello", 9, 'X') == "XXhelloXX");
3977 }
3978
3979 @safe pure
3980 unittest
3981 {
3982 import std.conv : to;
3983 import std.exception : assertCTFEable;
3984
3985 assertCTFEable!(
3986 {
3987 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3988 {
3989 S s = to!S("hello");
3990
3991 assert(leftJustify(s, 2) == "hello");
3992 assert(rightJustify(s, 2) == "hello");
3993 assert(center(s, 2) == "hello");
3994
3995 assert(leftJustify(s, 7) == "hello ");
3996 assert(rightJustify(s, 7) == " hello");
3997 assert(center(s, 7) == " hello ");
3998
3999 assert(leftJustify(s, 8) == "hello ");
4000 assert(rightJustify(s, 8) == " hello");
4001 assert(center(s, 8) == " hello ");
4002
4003 assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100");
4004 assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello");
4005 assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100");
4006
4007 assert(leftJustify(s, 8, 'ö') == "helloööö");
4008 assert(rightJustify(s, 8, 'ö') == "öööhello");
4009 assert(center(s, 8, 'ö') == "öhelloöö");
4010 }
4011 });
4012 }
4013
4014 /++
4015 Center justify $(D r) in a field $(D width) characters wide. $(D fillChar)
4016 is the character that will be used to fill up the space in the field that
4017 $(D r) doesn't fill.
4018
4019 Params:
4020 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4021 of characters
4022 width = minimum field width
4023 fillChar = used to pad end up to $(D width) characters
4024
4025 Returns:
4026 a lazy range of the center justified result
4027
4028 See_Also:
4029 $(LREF leftJustifier)
4030 $(LREF rightJustifier)
4031 +/
4032
4033 auto centerJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4034 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4035 !isConvertibleToString!Range)
4036 {
4037 alias C = Unqual!(ElementEncodingType!Range);
4038
4039 static if (C.sizeof == 1)
4040 {
4041 import std.utf : byDchar, byChar;
4042 return centerJustifier(r.byDchar, width, fillChar).byChar;
4043 }
4044 else static if (C.sizeof == 2)
4045 {
4046 import std.utf : byDchar, byWchar;
4047 return centerJustifier(r.byDchar, width, fillChar).byWchar;
4048 }
4049 else static if (C.sizeof == 4)
4050 {
4051 import std.range : chain, repeat;
4052 import std.range.primitives : walkLength;
4053
4054 auto len = walkLength(r.save, width);
4055 if (len > width)
4056 len = width;
4057 const nleft = (width - len) / 2;
4058 const nright = width - len - nleft;
4059 return chain(repeat(fillChar, nleft), r, repeat(fillChar, nright));
4060 }
4061 else
4062 static assert(0);
4063 }
4064
4065 ///
4066 @safe pure @nogc nothrow
4067 unittest
4068 {
4069 import std.algorithm.comparison : equal;
4070 import std.utf : byChar;
4071 assert(centerJustifier("hello", 2).equal("hello".byChar));
4072 assert(centerJustifier("hello", 8).equal(" hello ".byChar));
4073 assert(centerJustifier("hello", 7, 'x').equal("xhellox".byChar));
4074 }
4075
4076 auto centerJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4077 if (isConvertibleToString!Range)
4078 {
4079 return centerJustifier!(StringTypeOf!Range)(r, width, fillChar);
4080 }
4081
4082 @safe pure unittest
4083 {
4084 assert(testAliasedString!centerJustifier("hello", 8));
4085 }
4086
4087 @system unittest
4088 {
4089 static auto byFwdRange(dstring s)
4090 {
4091 static struct FRange
4092 {
4093 dstring str;
4094 this(dstring s) { str = s; }
4095 @property bool empty() { return str.length == 0; }
4096 @property dchar front() { return str[0]; }
4097 void popFront() { str = str[1 .. $]; }
4098 @property FRange save() { return this; }
4099 }
4100 return FRange(s);
4101 }
4102
4103 auto r = centerJustifier(byFwdRange("hello"d), 6);
4104 r.popFront();
4105 auto save = r.save;
4106 r.popFront();
4107 assert(r.front == 'l');
4108 assert(save.front == 'e');
4109
4110 auto t = "hello".centerJustifier(7);
4111 t.popFront();
4112 assert(t.front == 'h');
4113 t.popFront();
4114 assert(t.front == 'e');
4115
4116 auto u = byFwdRange("hello"d).centerJustifier(6);
4117 u.popFront();
4118 u.popFront();
4119 u.popFront();
4120 u.popFront();
4121 u.popFront();
4122 u.popFront();
4123 }
4124
4125
4126 /++
4127 Replace each tab character in $(D s) with the number of spaces necessary
4128 to align the following character at the next tab stop.
4129
4130 Params:
4131 s = string
4132 tabSize = distance between tab stops
4133
4134 Returns:
4135 GC allocated string with tabs replaced with spaces
4136 +/
4137 auto detab(Range)(auto ref Range s, size_t tabSize = 8) pure
4138 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4139 || __traits(compiles, StringTypeOf!Range))
4140 {
4141 import std.array : array;
4142 return detabber(s, tabSize).array;
4143 }
4144
4145 ///
4146 @system pure unittest
4147 {
4148 assert(detab(" \n\tx", 9) == " \n x");
4149 }
4150
4151 @safe pure unittest
4152 {
4153 static struct TestStruct
4154 {
4155 string s;
4156 alias s this;
4157 }
4158
4159 static struct TestStruct2
4160 {
4161 string s;
4162 alias s this;
4163 @disable this(this);
4164 }
4165
4166 string s = " \n\tx";
4167 string cmp = " \n x";
4168 auto t = TestStruct(s);
4169 assert(detab(t, 9) == cmp);
4170 assert(detab(TestStruct(s), 9) == cmp);
4171 assert(detab(TestStruct(s), 9) == detab(TestStruct(s), 9));
4172 assert(detab(TestStruct2(s), 9) == detab(TestStruct2(s), 9));
4173 assert(detab(TestStruct2(s), 9) == cmp);
4174 }
4175
4176 /++
4177 Replace each tab character in $(D r) with the number of spaces
4178 necessary to align the following character at the next tab stop.
4179
4180 Params:
4181 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4182 tabSize = distance between tab stops
4183
4184 Returns:
4185 lazy forward range with tabs replaced with spaces
4186 +/
4187 auto detabber(Range)(Range r, size_t tabSize = 8)
4188 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4189 !isConvertibleToString!Range)
4190 {
4191 import std.uni : lineSep, paraSep, nelSep;
4192 import std.utf : codeUnitLimit, decodeFront;
4193
4194 assert(tabSize > 0);
4195
4196 alias C = Unqual!(ElementEncodingType!(Range));
4197
4198 static struct Result
4199 {
4200 private:
4201 Range _input;
4202 size_t _tabSize;
4203 size_t nspaces;
4204 int column;
4205 size_t index;
4206
4207 public:
4208
4209 this(Range input, size_t tabSize)
4210 {
4211 _input = input;
4212 _tabSize = tabSize;
4213 }
4214
4215 static if (isInfinite!(Range))
4216 {
4217 enum bool empty = false;
4218 }
4219 else
4220 {
4221 @property bool empty()
4222 {
4223 return _input.empty && nspaces == 0;
4224 }
4225 }
4226
4227 @property C front()
4228 {
4229 if (nspaces)
4230 return ' ';
4231 static if (isSomeString!(Range))
4232 C c = _input[0];
4233 else
4234 C c = _input.front;
4235 if (index)
4236 return c;
4237 dchar dc;
4238 if (c < codeUnitLimit!(immutable(C)[]))
4239 {
4240 dc = c;
4241 index = 1;
4242 }
4243 else
4244 {
4245 auto r = _input.save;
4246 dc = decodeFront(r, index); // lookahead to decode
4247 }
4248 switch (dc)
4249 {
4250 case '\r':
4251 case '\n':
4252 case paraSep:
4253 case lineSep:
4254 case nelSep:
4255 column = 0;
4256 break;
4257
4258 case '\t':
4259 nspaces = _tabSize - (column % _tabSize);
4260 column += nspaces;
4261 c = ' ';
4262 break;
4263
4264 default:
4265 ++column;
4266 break;
4267 }
4268 return c;
4269 }
4270
4271 void popFront()
4272 {
4273 if (!index)
4274 front;
4275 if (nspaces)
4276 --nspaces;
4277 if (!nspaces)
4278 {
4279 static if (isSomeString!(Range))
4280 _input = _input[1 .. $];
4281 else
4282 _input.popFront();
4283 --index;
4284 }
4285 }
4286
4287 @property typeof(this) save()
4288 {
4289 auto ret = this;
4290 ret._input = _input.save;
4291 return ret;
4292 }
4293 }
4294
4295 return Result(r, tabSize);
4296 }
4297
4298 ///
4299 @system pure unittest
4300 {
4301 import std.array : array;
4302
4303 assert(detabber(" \n\tx", 9).array == " \n x");
4304 }
4305
4306 auto detabber(Range)(auto ref Range r, size_t tabSize = 8)
4307 if (isConvertibleToString!Range)
4308 {
4309 return detabber!(StringTypeOf!Range)(r, tabSize);
4310 }
4311
4312 @safe pure unittest
4313 {
4314 assert(testAliasedString!detabber( " ab\t asdf ", 8));
4315 }
4316
4317 @system pure unittest
4318 {
4319 import std.algorithm.comparison : cmp;
4320 import std.conv : to;
4321 import std.exception : assertCTFEable;
4322
4323 assertCTFEable!(
4324 {
4325 foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4326 {
4327 S s = to!S("This \tis\t a fofof\tof list");
4328 assert(cmp(detab(s), "This is a fofof of list") == 0);
4329
4330 assert(detab(cast(S) null) is null);
4331 assert(detab("").empty);
4332 assert(detab("a") == "a");
4333 assert(detab("\t") == " ");
4334 assert(detab("\t", 3) == " ");
4335 assert(detab("\t", 9) == " ");
4336 assert(detab( " ab\t asdf ") == " ab asdf ");
4337 assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf ");
4338 assert(detab("\r\t", 9) == "\r ");
4339 assert(detab("\n\t", 9) == "\n ");
4340 assert(detab("\u0085\t", 9) == "\u0085 ");
4341 assert(detab("\u2028\t", 9) == "\u2028 ");
4342 assert(detab(" \u2029\t", 9) == " \u2029 ");
4343 }
4344 });
4345 }
4346
4347 ///
4348 @system pure unittest
4349 {
4350 import std.array : array;
4351 import std.utf : byChar, byWchar;
4352
4353 assert(detabber(" \u2029\t".byChar, 9).array == " \u2029 ");
4354 auto r = "hel\tx".byWchar.detabber();
4355 assert(r.front == 'h');
4356 auto s = r.save;
4357 r.popFront();
4358 r.popFront();
4359 assert(r.front == 'l');
4360 assert(s.front == 'h');
4361 }
4362
4363 /++
4364 Replaces spaces in $(D s) with the optimal number of tabs.
4365 All spaces and tabs at the end of a line are removed.
4366
4367 Params:
4368 s = String to convert.
4369 tabSize = Tab columns are $(D tabSize) spaces apart.
4370
4371 Returns:
4372 GC allocated string with spaces replaced with tabs;
4373 use $(LREF entabber) to not allocate.
4374
4375 See_Also:
4376 $(LREF entabber)
4377 +/
4378 auto entab(Range)(Range s, size_t tabSize = 8)
4379 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4380 {
4381 import std.array : array;
4382 return entabber(s, tabSize).array;
4383 }
4384
4385 ///
4386 @safe pure unittest
4387 {
4388 assert(entab(" x \n") == "\tx\n");
4389 }
4390
4391 auto entab(Range)(auto ref Range s, size_t tabSize = 8)
4392 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) &&
4393 is(StringTypeOf!Range))
4394 {
4395 return entab!(StringTypeOf!Range)(s, tabSize);
4396 }
4397
4398 @safe pure unittest
4399 {
4400 assert(testAliasedString!entab(" x \n"));
4401 }
4402
4403 /++
4404 Replaces spaces in range $(D r) with the optimal number of tabs.
4405 All spaces and tabs at the end of a line are removed.
4406
4407 Params:
4408 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4409 tabSize = distance between tab stops
4410
4411 Returns:
4412 lazy forward range with spaces replaced with tabs
4413
4414 See_Also:
4415 $(LREF entab)
4416 +/
4417 auto entabber(Range)(Range r, size_t tabSize = 8)
4418 if (isForwardRange!Range && !isConvertibleToString!Range)
4419 {
4420 import std.uni : lineSep, paraSep, nelSep;
4421 import std.utf : codeUnitLimit, decodeFront;
4422
4423 assert(tabSize > 0);
4424 alias C = Unqual!(ElementEncodingType!Range);
4425
4426 static struct Result
4427 {
4428 private:
4429 Range _input;
4430 size_t _tabSize;
4431 size_t nspaces;
4432 size_t ntabs;
4433 int column;
4434 size_t index;
4435
4436 @property C getFront()
4437 {
4438 static if (isSomeString!Range)
4439 return _input[0]; // avoid autodecode
4440 else
4441 return _input.front;
4442 }
4443
4444 public:
4445
4446 this(Range input, size_t tabSize)
4447 {
4448 _input = input;
4449 _tabSize = tabSize;
4450 }
4451
4452 @property bool empty()
4453 {
4454 if (ntabs || nspaces)
4455 return false;
4456
4457 /* Since trailing spaces are removed,
4458 * look ahead for anything that is not a trailing space
4459 */
4460 static if (isSomeString!Range)
4461 {
4462 foreach (c; _input)
4463 {
4464 if (c != ' ' && c != '\t')
4465 return false;
4466 }
4467 return true;
4468 }
4469 else
4470 {
4471 if (_input.empty)
4472 return true;
4473 immutable c = _input.front;
4474 if (c != ' ' && c != '\t')
4475 return false;
4476 auto t = _input.save;
4477 t.popFront();
4478 foreach (c2; t)
4479 {
4480 if (c2 != ' ' && c2 != '\t')
4481 return false;
4482 }
4483 return true;
4484 }
4485 }
4486
4487 @property C front()
4488 {
4489 //writefln(" front(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
4490 if (ntabs)
4491 return '\t';
4492 if (nspaces)
4493 return ' ';
4494 C c = getFront;
4495 if (index)
4496 return c;
4497 dchar dc;
4498 if (c < codeUnitLimit!(immutable(C)[]))
4499 {
4500 index = 1;
4501 dc = c;
4502 if (c == ' ' || c == '\t')
4503 {
4504 // Consume input until a non-blank is encountered
4505 immutable startcol = column;
4506 C cx;
4507 static if (isSomeString!Range)
4508 {
4509 while (1)
4510 {
4511 assert(_input.length);
4512 cx = _input[0];
4513 if (cx == ' ')
4514 ++column;
4515 else if (cx == '\t')
4516 column += _tabSize - (column % _tabSize);
4517 else
4518 break;
4519 _input = _input[1 .. $];
4520 }
4521 }
4522 else
4523 {
4524 while (1)
4525 {
4526 assert(!_input.empty);
4527 cx = _input.front;
4528 if (cx == ' ')
4529 ++column;
4530 else if (cx == '\t')
4531 column += _tabSize - (column % _tabSize);
4532 else
4533 break;
4534 _input.popFront();
4535 }
4536 }
4537 // Compute ntabs+nspaces to get from startcol to column
4538 immutable n = column - startcol;
4539 if (n == 1)
4540 {
4541 nspaces = 1;
4542 }
4543 else
4544 {
4545 ntabs = column / _tabSize - startcol / _tabSize;
4546 if (ntabs == 0)
4547 nspaces = column - startcol;
4548 else
4549 nspaces = column % _tabSize;
4550 }
4551 //writefln("\tstartcol = %s, column = %s, _tabSize = %s", startcol, column, _tabSize);
4552 //writefln("\tntabs = %s, nspaces = %s", ntabs, nspaces);
4553 if (cx < codeUnitLimit!(immutable(C)[]))
4554 {
4555 dc = cx;
4556 index = 1;
4557 }
4558 else
4559 {
4560 auto r = _input.save;
4561 dc = decodeFront(r, index); // lookahead to decode
4562 }
4563 switch (dc)
4564 {
4565 case '\r':
4566 case '\n':
4567 case paraSep:
4568 case lineSep:
4569 case nelSep:
4570 column = 0;
4571 // Spaces followed by newline are ignored
4572 ntabs = 0;
4573 nspaces = 0;
4574 return cx;
4575
4576 default:
4577 ++column;
4578 break;
4579 }
4580 return ntabs ? '\t' : ' ';
4581 }
4582 }
4583 else
4584 {
4585 auto r = _input.save;
4586 dc = decodeFront(r, index); // lookahead to decode
4587 }
4588 //writefln("dc = x%x", dc);
4589 switch (dc)
4590 {
4591 case '\r':
4592 case '\n':
4593 case paraSep:
4594 case lineSep:
4595 case nelSep:
4596 column = 0;
4597 break;
4598
4599 default:
4600 ++column;
4601 break;
4602 }
4603 return c;
4604 }
4605
4606 void popFront()
4607 {
4608 //writefln("popFront(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
4609 if (!index)
4610 front;
4611 if (ntabs)
4612 --ntabs;
4613 else if (nspaces)
4614 --nspaces;
4615 else if (!ntabs && !nspaces)
4616 {
4617 static if (isSomeString!Range)
4618 _input = _input[1 .. $];
4619 else
4620 _input.popFront();
4621 --index;
4622 }
4623 }
4624
4625 @property typeof(this) save()
4626 {
4627 auto ret = this;
4628 ret._input = _input.save;
4629 return ret;
4630 }
4631 }
4632
4633 return Result(r, tabSize);
4634 }
4635
4636 ///
4637 @safe pure unittest
4638 {
4639 import std.array : array;
4640 assert(entabber(" x \n").array == "\tx\n");
4641 }
4642
4643 auto entabber(Range)(auto ref Range r, size_t tabSize = 8)
4644 if (isConvertibleToString!Range)
4645 {
4646 return entabber!(StringTypeOf!Range)(r, tabSize);
4647 }
4648
4649 @safe pure unittest
4650 {
4651 assert(testAliasedString!entabber(" ab asdf ", 8));
4652 }
4653
4654 @safe pure
4655 unittest
4656 {
4657 import std.conv : to;
4658 import std.exception : assertCTFEable;
4659
4660 assertCTFEable!(
4661 {
4662 assert(entab(cast(string) null) is null);
4663 assert(entab("").empty);
4664 assert(entab("a") == "a");
4665 assert(entab(" ") == "");
4666 assert(entab(" x") == "\tx");
4667 assert(entab(" ab asdf ") == " ab\tasdf");
4668 assert(entab(" ab asdf ") == " ab\t asdf");
4669 assert(entab(" ab \t asdf ") == " ab\t asdf");
4670 assert(entab("1234567 \ta") == "1234567\t\ta");
4671 assert(entab("1234567 \ta") == "1234567\t\ta");
4672 assert(entab("1234567 \ta") == "1234567\t\ta");
4673 assert(entab("1234567 \ta") == "1234567\t\ta");
4674 assert(entab("1234567 \ta") == "1234567\t\ta");
4675 assert(entab("1234567 \ta") == "1234567\t\ta");
4676 assert(entab("1234567 \ta") == "1234567\t\ta");
4677 assert(entab("1234567 \ta") == "1234567\t\ta");
4678 assert(entab("1234567 \ta") == "1234567\t\t\ta");
4679
4680 assert(entab("a ") == "a");
4681 assert(entab("a\v") == "a\v");
4682 assert(entab("a\f") == "a\f");
4683 assert(entab("a\n") == "a\n");
4684 assert(entab("a\n\r") == "a\n\r");
4685 assert(entab("a\r\n") == "a\r\n");
4686 assert(entab("a\u2028") == "a\u2028");
4687 assert(entab("a\u2029") == "a\u2029");
4688 assert(entab("a\u0085") == "a\u0085");
4689 assert(entab("a ") == "a");
4690 assert(entab("a\t") == "a");
4691 assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") ==
4692 "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F");
4693 assert(entab(" \naa") == "\naa");
4694 assert(entab(" \r aa") == "\r aa");
4695 assert(entab(" \u2028 aa") == "\u2028 aa");
4696 assert(entab(" \u2029 aa") == "\u2029 aa");
4697 assert(entab(" \u0085 aa") == "\u0085 aa");
4698 });
4699 }
4700
4701 @safe pure
4702 unittest
4703 {
4704 import std.array : array;
4705 import std.utf : byChar;
4706 assert(entabber(" \u0085 aa".byChar).array == "\u0085 aa");
4707 assert(entabber(" \u2028\t aa \t".byChar).array == "\u2028\t aa");
4708
4709 auto r = entabber("1234", 4);
4710 r.popFront();
4711 auto rsave = r.save;
4712 r.popFront();
4713 assert(r.front == '3');
4714 assert(rsave.front == '2');
4715 }
4716
4717
4718 /++
4719 Replaces the characters in $(D str) which are keys in $(D transTable) with
4720 their corresponding values in $(D transTable). $(D transTable) is an AA
4721 where its keys are $(D dchar) and its values are either $(D dchar) or some
4722 type of string. Also, if $(D toRemove) is given, the characters in it are
4723 removed from $(D str) prior to translation. $(D str) itself is unaltered.
4724 A copy with the changes is returned.
4725
4726 See_Also:
4727 $(LREF tr)
4728 $(REF replace, std,array)
4729
4730 Params:
4731 str = The original string.
4732 transTable = The AA indicating which characters to replace and what to
4733 replace them with.
4734 toRemove = The characters to remove from the string.
4735 +/
4736 C1[] translate(C1, C2 = immutable char)(C1[] str,
4737 in dchar[dchar] transTable,
4738 const(C2)[] toRemove = null) @safe pure
4739 if (isSomeChar!C1 && isSomeChar!C2)
4740 {
4741 import std.array : appender;
4742 auto buffer = appender!(C1[])();
4743 translateImpl(str, transTable, toRemove, buffer);
4744 return buffer.data;
4745 }
4746
4747 ///
4748 @safe pure unittest
4749 {
4750 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4751 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
4752
4753 assert(translate("hello world", transTable1, "low") == "h5 rd");
4754
4755 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4756 assert(translate("hello world", transTable2) == "h5llorange worangerld");
4757 }
4758
4759 @safe pure unittest // issue 13018
4760 {
4761 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4762 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
4763
4764 assert(translate("hello world", transTable1, "low") == "h5 rd");
4765
4766 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4767 assert(translate("hello world", transTable2) == "h5llorange worangerld");
4768 }
4769
4770 @system pure unittest
4771 {
4772 import std.conv : to;
4773 import std.exception : assertCTFEable;
4774
4775 assertCTFEable!(
4776 {
4777 foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
4778 wchar[], const(wchar)[], immutable(wchar)[],
4779 dchar[], const(dchar)[], immutable(dchar)[]))
4780 {
4781 assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) ==
4782 to!S("qe55o wor5d"));
4783 assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) ==
4784 to!S("he\U00010143\U00010143l wlr\U00010143d"));
4785 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) ==
4786 to!S("qe55o \U00010143 wor5d"));
4787 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) ==
4788 to!S("hell0 o w0rld"));
4789 assert(translate(to!S("hello world"), cast(dchar[dchar]) null) == to!S("hello world"));
4790
4791 foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
4792 wchar[], const(wchar)[], immutable(wchar)[],
4793 dchar[], const(dchar)[], immutable(dchar)[]))
4794 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
4795 foreach (R; AliasSeq!(dchar[dchar], const dchar[dchar],
4796 immutable dchar[dchar]))
4797 {
4798 R tt = ['h' : 'q', 'l' : '5'];
4799 assert(translate(to!S("hello world"), tt, to!T("r"))
4800 == to!S("qe55o wo5d"));
4801 assert(translate(to!S("hello world"), tt, to!T("helo"))
4802 == to!S(" wrd"));
4803 assert(translate(to!S("hello world"), tt, to!T("q5"))
4804 == to!S("qe55o wor5d"));
4805 }
4806 }();
4807
4808 auto s = to!S("hello world");
4809 dchar[dchar] transTable = ['h' : 'q', 'l' : '5'];
4810 static assert(is(typeof(s) == typeof(translate(s, transTable))));
4811 }
4812 });
4813 }
4814
4815 /++ Ditto +/
4816 C1[] translate(C1, S, C2 = immutable char)(C1[] str,
4817 in S[dchar] transTable,
4818 const(C2)[] toRemove = null) @safe pure
4819 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2)
4820 {
4821 import std.array : appender;
4822 auto buffer = appender!(C1[])();
4823 translateImpl(str, transTable, toRemove, buffer);
4824 return buffer.data;
4825 }
4826
4827 @system pure unittest
4828 {
4829 import std.conv : to;
4830 import std.exception : assertCTFEable;
4831
4832 assertCTFEable!(
4833 {
4834 foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
4835 wchar[], const(wchar)[], immutable(wchar)[],
4836 dchar[], const(dchar)[], immutable(dchar)[]))
4837 {
4838 assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) ==
4839 to!S("yellowe4242o wor42d"));
4840 assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
4841 to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d"));
4842 assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) ==
4843 to!S("yellowe4242o \U00010143 wor42d"));
4844 assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
4845 to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d"));
4846 assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) ==
4847 to!S("ello \U00010143 world"));
4848 assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) ==
4849 to!S("hello world"));
4850 assert(translate(to!S("hello world"), cast(string[dchar]) null) == to!S("hello world"));
4851
4852 foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
4853 wchar[], const(wchar)[], immutable(wchar)[],
4854 dchar[], const(dchar)[], immutable(dchar)[]))
4855 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
4856
4857 foreach (R; AliasSeq!(string[dchar], const string[dchar],
4858 immutable string[dchar]))
4859 {
4860 R tt = ['h' : "yellow", 'l' : "42"];
4861 assert(translate(to!S("hello world"), tt, to!T("r")) ==
4862 to!S("yellowe4242o wo42d"));
4863 assert(translate(to!S("hello world"), tt, to!T("helo")) ==
4864 to!S(" wrd"));
4865 assert(translate(to!S("hello world"), tt, to!T("y42")) ==
4866 to!S("yellowe4242o wor42d"));
4867 assert(translate(to!S("hello world"), tt, to!T("hello world")) ==
4868 to!S(""));
4869 assert(translate(to!S("hello world"), tt, to!T("42")) ==
4870 to!S("yellowe4242o wor42d"));
4871 }
4872 }();
4873
4874 auto s = to!S("hello world");
4875 string[dchar] transTable = ['h' : "silly", 'l' : "putty"];
4876 static assert(is(typeof(s) == typeof(translate(s, transTable))));
4877 }
4878 });
4879 }
4880
4881 /++
4882 This is an overload of $(D translate) which takes an existing buffer to write the contents to.
4883
4884 Params:
4885 str = The original string.
4886 transTable = The AA indicating which characters to replace and what to
4887 replace them with.
4888 toRemove = The characters to remove from the string.
4889 buffer = An output range to write the contents to.
4890 +/
4891 void translate(C1, C2 = immutable char, Buffer)(C1[] str,
4892 in dchar[dchar] transTable,
4893 const(C2)[] toRemove,
4894 Buffer buffer)
4895 if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1))
4896 {
4897 translateImpl(str, transTable, toRemove, buffer);
4898 }
4899
4900 ///
4901 @safe pure unittest
4902 {
4903 import std.array : appender;
4904 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4905 auto buffer = appender!(dchar[])();
4906 translate("hello world", transTable1, null, buffer);
4907 assert(buffer.data == "h5ll7 w7rld");
4908
4909 buffer.clear();
4910 translate("hello world", transTable1, "low", buffer);
4911 assert(buffer.data == "h5 rd");
4912
4913 buffer.clear();
4914 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4915 translate("hello world", transTable2, null, buffer);
4916 assert(buffer.data == "h5llorange worangerld");
4917 }
4918
4919 @safe pure unittest // issue 13018
4920 {
4921 import std.array : appender;
4922 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
4923 auto buffer = appender!(dchar[])();
4924 translate("hello world", transTable1, null, buffer);
4925 assert(buffer.data == "h5ll7 w7rld");
4926
4927 buffer.clear();
4928 translate("hello world", transTable1, "low", buffer);
4929 assert(buffer.data == "h5 rd");
4930
4931 buffer.clear();
4932 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
4933 translate("hello world", transTable2, null, buffer);
4934 assert(buffer.data == "h5llorange worangerld");
4935 }
4936
4937 /++ Ditto +/
4938 void translate(C1, S, C2 = immutable char, Buffer)(C1[] str,
4939 in S[dchar] transTable,
4940 const(C2)[] toRemove,
4941 Buffer buffer)
4942 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S))
4943 {
4944 translateImpl(str, transTable, toRemove, buffer);
4945 }
4946
4947 private void translateImpl(C1, T, C2, Buffer)(C1[] str,
4948 T transTable,
4949 const(C2)[] toRemove,
4950 Buffer buffer)
4951 {
4952 bool[dchar] removeTable;
4953
4954 foreach (dchar c; toRemove)
4955 removeTable[c] = true;
4956
4957 foreach (dchar c; str)
4958 {
4959 if (c in removeTable)
4960 continue;
4961
4962 auto newC = c in transTable;
4963
4964 if (newC)
4965 put(buffer, *newC);
4966 else
4967 put(buffer, c);
4968 }
4969 }
4970
4971 /++
4972 This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It
4973 will $(I not) work with Unicode. It exists as an optimization for the
4974 cases where Unicode processing is not necessary.
4975
4976 Unlike the other overloads of $(LREF _translate), this one does not take
4977 an AA. Rather, it takes a $(D string) generated by $(LREF makeTransTable).
4978
4979 The array generated by $(D makeTransTable) is $(D 256) elements long such that
4980 the index is equal to the ASCII character being replaced and the value is
4981 equal to the character that it's being replaced with. Note that translate
4982 does not decode any of the characters, so you can actually pass it Extended
4983 ASCII characters if you want to (ASCII only actually uses $(D 128)
4984 characters), but be warned that Extended ASCII characters are not valid
4985 Unicode and therefore will result in a $(D UTFException) being thrown from
4986 most other Phobos functions.
4987
4988 Also, because no decoding occurs, it is possible to use this overload to
4989 translate ASCII characters within a proper UTF-8 string without altering the
4990 other, non-ASCII characters. It's replacing any code unit greater than
4991 $(D 127) with another code unit or replacing any code unit with another code
4992 unit greater than $(D 127) which will cause UTF validation issues.
4993
4994 See_Also:
4995 $(LREF tr)
4996 $(REF replace, std,array)
4997
4998 Params:
4999 str = The original string.
5000 transTable = The string indicating which characters to replace and what
5001 to replace them with. It is generated by $(LREF makeTransTable).
5002 toRemove = The characters to remove from the string.
5003 +/
5004 C[] translate(C = immutable char)(in char[] str, in char[] transTable, in char[] toRemove = null) @trusted pure nothrow
5005 if (is(Unqual!C == char))
5006 in
5007 {
5008 assert(transTable.length == 256);
5009 }
5010 body
5011 {
5012 bool[256] remTable = false;
5013
5014 foreach (char c; toRemove)
5015 remTable[c] = true;
5016
5017 size_t count = 0;
5018 foreach (char c; str)
5019 {
5020 if (!remTable[c])
5021 ++count;
5022 }
5023
5024 auto buffer = new char[count];
5025
5026 size_t i = 0;
5027 foreach (char c; str)
5028 {
5029 if (!remTable[c])
5030 buffer[i++] = transTable[c];
5031 }
5032
5033 return cast(C[])(buffer);
5034 }
5035
5036
5037 /**
5038 * Do same thing as $(LREF makeTransTable) but allocate the translation table
5039 * on the GC heap.
5040 *
5041 * Use $(LREF makeTransTable) instead.
5042 */
5043 string makeTrans(in char[] from, in char[] to) @trusted pure nothrow
5044 {
5045 return makeTransTable(from, to)[].idup;
5046 }
5047
5048 ///
5049 @safe pure nothrow unittest
5050 {
5051 auto transTable1 = makeTrans("eo5", "57q");
5052 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5053
5054 assert(translate("hello world", transTable1, "low") == "h5 rd");
5055 }
5056
5057 /*******
5058 * Construct 256 character translation table, where characters in from[] are replaced
5059 * by corresponding characters in to[].
5060 *
5061 * Params:
5062 * from = array of chars, less than or equal to 256 in length
5063 * to = corresponding array of chars to translate to
5064 * Returns:
5065 * translation array
5066 */
5067
5068 char[256] makeTransTable(in char[] from, in char[] to) @safe pure nothrow @nogc
5069 in
5070 {
5071 import std.ascii : isASCII;
5072 assert(from.length == to.length);
5073 assert(from.length <= 256);
5074 foreach (char c; from)
5075 assert(isASCII(c));
5076 foreach (char c; to)
5077 assert(isASCII(c));
5078 }
5079 body
5080 {
5081 char[256] result = void;
5082
5083 foreach (i; 0 .. result.length)
5084 result[i] = cast(char) i;
5085 foreach (i, c; from)
5086 result[c] = to[i];
5087 return result;
5088 }
5089
5090 @safe pure unittest
5091 {
5092 import std.conv : to;
5093 import std.exception : assertCTFEable;
5094
5095 assertCTFEable!(
5096 {
5097 foreach (C; AliasSeq!(char, const char, immutable char))
5098 {
5099 assert(translate!C("hello world", makeTransTable("hl", "q5")) == to!(C[])("qe55o wor5d"));
5100
5101 auto s = to!(C[])("hello world");
5102 auto transTable = makeTransTable("hl", "q5");
5103 static assert(is(typeof(s) == typeof(translate!C(s, transTable))));
5104 }
5105
5106 foreach (S; AliasSeq!(char[], const(char)[], immutable(char)[]))
5107 {
5108 assert(translate(to!S("hello world"), makeTransTable("hl", "q5")) == to!S("qe55o wor5d"));
5109 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5")) ==
5110 to!S("qe55o \U00010143 wor5d"));
5111 assert(translate(to!S("hello world"), makeTransTable("ol", "1o")) == to!S("heoo1 w1rod"));
5112 assert(translate(to!S("hello world"), makeTransTable("", "")) == to!S("hello world"));
5113 assert(translate(to!S("hello world"), makeTransTable("12345", "67890")) == to!S("hello world"));
5114 assert(translate(to!S("hello \U00010143 world"), makeTransTable("12345", "67890")) ==
5115 to!S("hello \U00010143 world"));
5116
5117 foreach (T; AliasSeq!(char[], const(char)[], immutable(char)[]))
5118 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
5119 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("r")) ==
5120 to!S("qe55o wo5d"));
5121 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5"), to!T("r")) ==
5122 to!S("qe55o \U00010143 wo5d"));
5123 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("helo")) ==
5124 to!S(" wrd"));
5125 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("q5")) ==
5126 to!S("qe55o wor5d"));
5127 }();
5128 }
5129 });
5130 }
5131
5132 /++
5133 This is an $(I $(RED ASCII-only)) overload of $(D translate) which takes an existing buffer to write the contents to.
5134
5135 Params:
5136 str = The original string.
5137 transTable = The string indicating which characters to replace and what
5138 to replace them with. It is generated by $(LREF makeTransTable).
5139 toRemove = The characters to remove from the string.
5140 buffer = An output range to write the contents to.
5141 +/
5142 void translate(C = immutable char, Buffer)(in char[] str, in char[] transTable,
5143 in char[] toRemove, Buffer buffer) @trusted pure
5144 if (is(Unqual!C == char) && isOutputRange!(Buffer, char))
5145 in
5146 {
5147 assert(transTable.length == 256);
5148 }
5149 body
5150 {
5151 bool[256] remTable = false;
5152
5153 foreach (char c; toRemove)
5154 remTable[c] = true;
5155
5156 foreach (char c; str)
5157 {
5158 if (!remTable[c])
5159 put(buffer, transTable[c]);
5160 }
5161 }
5162
5163 ///
5164 @safe pure unittest
5165 {
5166 import std.array : appender;
5167 auto buffer = appender!(char[])();
5168 auto transTable1 = makeTransTable("eo5", "57q");
5169 translate("hello world", transTable1, null, buffer);
5170 assert(buffer.data == "h5ll7 w7rld");
5171
5172 buffer.clear();
5173 translate("hello world", transTable1, "low", buffer);
5174 assert(buffer.data == "h5 rd");
5175 }
5176
5177 //@@@DEPRECATED_2.086@@@
5178 deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.")
5179 bool inPattern(S)(dchar c, in S pattern) @safe pure @nogc
5180 if (isSomeString!S)
5181 {
5182 bool result = false;
5183 int range = 0;
5184 dchar lastc;
5185
5186 foreach (size_t i, dchar p; pattern)
5187 {
5188 if (p == '^' && i == 0)
5189 {
5190 result = true;
5191 if (i + 1 == pattern.length)
5192 return (c == p); // or should this be an error?
5193 }
5194 else if (range)
5195 {
5196 range = 0;
5197 if (lastc <= c && c <= p || c == p)
5198 return !result;
5199 }
5200 else if (p == '-' && i > result && i + 1 < pattern.length)
5201 {
5202 range = 1;
5203 continue;
5204 }
5205 else if (c == p)
5206 return !result;
5207 lastc = p;
5208 }
5209 return result;
5210 }
5211
5212
5213 deprecated
5214 @safe pure @nogc unittest
5215 {
5216 import std.conv : to;
5217 import std.exception : assertCTFEable;
5218
5219 assertCTFEable!(
5220 {
5221 assert(inPattern('x', "x") == 1);
5222 assert(inPattern('x', "y") == 0);
5223 assert(inPattern('x', string.init) == 0);
5224 assert(inPattern('x', "^y") == 1);
5225 assert(inPattern('x', "yxxy") == 1);
5226 assert(inPattern('x', "^yxxy") == 0);
5227 assert(inPattern('x', "^abcd") == 1);
5228 assert(inPattern('^', "^^") == 0);
5229 assert(inPattern('^', "^") == 1);
5230 assert(inPattern('^', "a^") == 1);
5231 assert(inPattern('x', "a-z") == 1);
5232 assert(inPattern('x', "A-Z") == 0);
5233 assert(inPattern('x', "^a-z") == 0);
5234 assert(inPattern('x', "^A-Z") == 1);
5235 assert(inPattern('-', "a-") == 1);
5236 assert(inPattern('-', "^A-") == 0);
5237 assert(inPattern('a', "z-a") == 1);
5238 assert(inPattern('z', "z-a") == 1);
5239 assert(inPattern('x', "z-a") == 0);
5240 });
5241 }
5242
5243 //@@@DEPRECATED_2.086@@@
5244 deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.")
5245 bool inPattern(S)(dchar c, S[] patterns) @safe pure @nogc
5246 if (isSomeString!S)
5247 {
5248 foreach (string pattern; patterns)
5249 {
5250 if (!inPattern(c, pattern))
5251 {
5252 return false;
5253 }
5254 }
5255 return true;
5256 }
5257
5258 //@@@DEPRECATED_2.086@@@
5259 deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.")
5260 size_t countchars(S, S1)(S s, in S1 pattern) @safe pure @nogc
5261 if (isSomeString!S && isSomeString!S1)
5262 {
5263 size_t count;
5264 foreach (dchar c; s)
5265 {
5266 count += inPattern(c, pattern);
5267 }
5268 return count;
5269 }
5270
5271 deprecated
5272 @safe pure @nogc unittest
5273 {
5274 import std.conv : to;
5275 import std.exception : assertCTFEable;
5276
5277 assertCTFEable!(
5278 {
5279 assert(countchars("abc", "a-c") == 3);
5280 assert(countchars("hello world", "or") == 3);
5281 });
5282 }
5283
5284 //@@@DEPRECATED_2.086@@@
5285 deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.")
5286 S removechars(S)(S s, in S pattern) @safe pure
5287 if (isSomeString!S)
5288 {
5289 import std.utf : encode;
5290
5291 Unqual!(typeof(s[0]))[] r;
5292 bool changed = false;
5293
5294 foreach (size_t i, dchar c; s)
5295 {
5296 if (inPattern(c, pattern))
5297 {
5298 if (!changed)
5299 {
5300 changed = true;
5301 r = s[0 .. i].dup;
5302 }
5303 continue;
5304 }
5305 if (changed)
5306 {
5307 encode(r, c);
5308 }
5309 }
5310 if (changed)
5311 return r;
5312 else
5313 return s;
5314 }
5315
5316 deprecated
5317 @safe pure unittest
5318 {
5319 import std.conv : to;
5320 import std.exception : assertCTFEable;
5321
5322 assertCTFEable!(
5323 {
5324 assert(removechars("abc", "a-c").length == 0);
5325 assert(removechars("hello world", "or") == "hell wld");
5326 assert(removechars("hello world", "d") == "hello worl");
5327 assert(removechars("hah", "h") == "a");
5328 });
5329 }
5330
5331 deprecated
5332 @safe pure unittest
5333 {
5334 assert(removechars("abc", "x") == "abc");
5335 }
5336
5337 //@@@DEPRECATED_2.086@@@
5338 deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.")
5339 S squeeze(S)(S s, in S pattern = null)
5340 {
5341 import std.utf : encode, stride;
5342
5343 Unqual!(typeof(s[0]))[] r;
5344 dchar lastc;
5345 size_t lasti;
5346 int run;
5347 bool changed;
5348
5349 foreach (size_t i, dchar c; s)
5350 {
5351 if (run && lastc == c)
5352 {
5353 changed = true;
5354 }
5355 else if (pattern is null || inPattern(c, pattern))
5356 {
5357 run = 1;
5358 if (changed)
5359 {
5360 if (r is null)
5361 r = s[0 .. lasti].dup;
5362 encode(r, c);
5363 }
5364 else
5365 lasti = i + stride(s, i);
5366 lastc = c;
5367 }
5368 else
5369 {
5370 run = 0;
5371 if (changed)
5372 {
5373 if (r is null)
5374 r = s[0 .. lasti].dup;
5375 encode(r, c);
5376 }
5377 }
5378 }
5379 return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s;
5380 }
5381
5382 deprecated
5383 @system pure unittest
5384 {
5385 import std.conv : to;
5386 import std.exception : assertCTFEable;
5387
5388 assertCTFEable!(
5389 {
5390 string s;
5391
5392 assert(squeeze("hello") == "helo");
5393
5394 s = "abcd";
5395 assert(squeeze(s) is s);
5396 s = "xyzz";
5397 assert(squeeze(s).ptr == s.ptr); // should just be a slice
5398
5399 assert(squeeze("hello goodbyee", "oe") == "hello godbye");
5400 });
5401 }
5402
5403 //@@@DEPRECATED_2.086@@@
5404 deprecated("This function is obsolete. It is available in https://github.com/dlang/undeaD if necessary.")
5405 S1 munch(S1, S2)(ref S1 s, S2 pattern) @safe pure @nogc
5406 {
5407 size_t j = s.length;
5408 foreach (i, dchar c; s)
5409 {
5410 if (!inPattern(c, pattern))
5411 {
5412 j = i;
5413 break;
5414 }
5415 }
5416 scope(exit) s = s[j .. $];
5417 return s[0 .. j];
5418 }
5419
5420 ///
5421 deprecated
5422 @safe pure @nogc unittest
5423 {
5424 string s = "123abc";
5425 string t = munch(s, "0123456789");
5426 assert(t == "123" && s == "abc");
5427 t = munch(s, "0123456789");
5428 assert(t == "" && s == "abc");
5429 }
5430
5431 deprecated
5432 @safe pure @nogc unittest
5433 {
5434 string s = "123€abc";
5435 string t = munch(s, "0123456789");
5436 assert(t == "123" && s == "€abc");
5437 t = munch(s, "0123456789");
5438 assert(t == "" && s == "€abc");
5439 t = munch(s, "£$€¥");
5440 assert(t == "€" && s == "abc");
5441 }
5442
5443
5444 /**********************************************
5445 * Return string that is the 'successor' to s[].
5446 * If the rightmost character is a-zA-Z0-9, it is incremented within
5447 * its case or digits. If it generates a carry, the process is
5448 * repeated with the one to its immediate left.
5449 */
5450
5451 S succ(S)(S s) @safe pure
5452 if (isSomeString!S)
5453 {
5454 import std.ascii : isAlphaNum;
5455
5456 if (s.length && isAlphaNum(s[$ - 1]))
5457 {
5458 auto r = s.dup;
5459 size_t i = r.length - 1;
5460
5461 while (1)
5462 {
5463 dchar c = s[i];
5464 dchar carry;
5465
5466 switch (c)
5467 {
5468 case '9':
5469 c = '0';
5470 carry = '1';
5471 goto Lcarry;
5472 case 'z':
5473 case 'Z':
5474 c -= 'Z' - 'A';
5475 carry = c;
5476 Lcarry:
5477 r[i] = cast(char) c;
5478 if (i == 0)
5479 {
5480 auto t = new typeof(r[0])[r.length + 1];
5481 t[0] = cast(char) carry;
5482 t[1 .. $] = r[];
5483 return t;
5484 }
5485 i--;
5486 break;
5487
5488 default:
5489 if (isAlphaNum(c))
5490 r[i]++;
5491 return r;
5492 }
5493 }
5494 }
5495 return s;
5496 }
5497
5498 ///
5499 @safe pure unittest
5500 {
5501 assert(succ("1") == "2");
5502 assert(succ("9") == "10");
5503 assert(succ("999") == "1000");
5504 assert(succ("zz99") == "aaa00");
5505 }
5506
5507 @safe pure unittest
5508 {
5509 import std.conv : to;
5510 import std.exception : assertCTFEable;
5511
5512 assertCTFEable!(
5513 {
5514 assert(succ(string.init) is null);
5515 assert(succ("!@#$%") == "!@#$%");
5516 assert(succ("1") == "2");
5517 assert(succ("9") == "10");
5518 assert(succ("999") == "1000");
5519 assert(succ("zz99") == "aaa00");
5520 });
5521 }
5522
5523
5524 /++
5525 Replaces the characters in $(D str) which are in $(D from) with the
5526 the corresponding characters in $(D to) and returns the resulting string.
5527
5528 $(D tr) is based on
5529 $(HTTP pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr),
5530 though it doesn't do everything that the Posix utility does.
5531
5532 Params:
5533 str = The original string.
5534 from = The characters to replace.
5535 to = The characters to replace with.
5536 modifiers = String containing modifiers.
5537
5538 Modifiers:
5539 $(BOOKTABLE,
5540 $(TR $(TD Modifier) $(TD Description))
5541 $(TR $(TD $(D 'c')) $(TD Complement the list of characters in $(D from)))
5542 $(TR $(TD $(D 'd')) $(TD Removes matching characters with no corresponding
5543 replacement in $(D to)))
5544 $(TR $(TD $(D 's')) $(TD Removes adjacent duplicates in the replaced
5545 characters))
5546 )
5547
5548 If the modifier $(D 'd') is present, then the number of characters in
5549 $(D to) may be only $(D 0) or $(D 1).
5550
5551 If the modifier $(D 'd') is $(I not) present, and $(D to) is empty, then
5552 $(D to) is taken to be the same as $(D from).
5553
5554 If the modifier $(D 'd') is $(I not) present, and $(D to) is shorter than
5555 $(D from), then $(D to) is extended by replicating the last character in
5556 $(D to).
5557
5558 Both $(D from) and $(D to) may contain ranges using the $(D '-') character
5559 (e.g. $(D "a-d") is synonymous with $(D "abcd").) Neither accept a leading
5560 $(D '^') as meaning the complement of the string (use the $(D 'c') modifier
5561 for that).
5562 +/
5563 C1[] tr(C1, C2, C3, C4 = immutable char)
5564 (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null)
5565 {
5566 import std.array : appender;
5567 import std.conv : conv_to = to;
5568 import std.utf : decode;
5569
5570 bool mod_c;
5571 bool mod_d;
5572 bool mod_s;
5573
5574 foreach (char c; modifiers)
5575 {
5576 switch (c)
5577 {
5578 case 'c': mod_c = 1; break; // complement
5579 case 'd': mod_d = 1; break; // delete unreplaced chars
5580 case 's': mod_s = 1; break; // squeeze duplicated replaced chars
5581 default: assert(0);
5582 }
5583 }
5584
5585 if (to.empty && !mod_d)
5586 to = conv_to!(typeof(to))(from);
5587
5588 auto result = appender!(C1[])();
5589 bool modified;
5590 dchar lastc;
5591
5592 foreach (dchar c; str)
5593 {
5594 dchar lastf;
5595 dchar lastt;
5596 dchar newc;
5597 int n = 0;
5598
5599 for (size_t i = 0; i < from.length; )
5600 {
5601 immutable f = decode(from, i);
5602 if (f == '-' && lastf != dchar.init && i < from.length)
5603 {
5604 immutable nextf = decode(from, i);
5605 if (lastf <= c && c <= nextf)
5606 {
5607 n += c - lastf - 1;
5608 if (mod_c)
5609 goto Lnotfound;
5610 goto Lfound;
5611 }
5612 n += nextf - lastf;
5613 lastf = lastf.init;
5614 continue;
5615 }
5616
5617 if (c == f)
5618 { if (mod_c)
5619 goto Lnotfound;
5620 goto Lfound;
5621 }
5622 lastf = f;
5623 n++;
5624 }
5625 if (!mod_c)
5626 goto Lnotfound;
5627 n = 0; // consider it 'found' at position 0
5628
5629 Lfound:
5630
5631 // Find the nth character in to[]
5632 dchar nextt;
5633 for (size_t i = 0; i < to.length; )
5634 {
5635 immutable t = decode(to, i);
5636 if (t == '-' && lastt != dchar.init && i < to.length)
5637 {
5638 nextt = decode(to, i);
5639 n -= nextt - lastt;
5640 if (n < 0)
5641 {
5642 newc = nextt + n + 1;
5643 goto Lnewc;
5644 }
5645 lastt = dchar.init;
5646 continue;
5647 }
5648 if (n == 0)
5649 { newc = t;
5650 goto Lnewc;
5651 }
5652 lastt = t;
5653 nextt = t;
5654 n--;
5655 }
5656 if (mod_d)
5657 continue;
5658 newc = nextt;
5659
5660 Lnewc:
5661 if (mod_s && modified && newc == lastc)
5662 continue;
5663 result.put(newc);
5664 assert(newc != dchar.init);
5665 modified = true;
5666 lastc = newc;
5667 continue;
5668
5669 Lnotfound:
5670 result.put(c);
5671 lastc = c;
5672 modified = false;
5673 }
5674
5675 return result.data;
5676 }
5677
5678 @safe pure unittest
5679 {
5680 import std.algorithm.comparison : equal;
5681 import std.conv : to;
5682 import std.exception : assertCTFEable;
5683
5684 // Complete list of test types; too slow to test'em all
5685 // alias TestTypes = AliasSeq!(
5686 // char[], const( char)[], immutable( char)[],
5687 // wchar[], const(wchar)[], immutable(wchar)[],
5688 // dchar[], const(dchar)[], immutable(dchar)[]);
5689
5690 // Reduced list of test types
5691 alias TestTypes = AliasSeq!(char[], const(wchar)[], immutable(dchar)[]);
5692
5693 assertCTFEable!(
5694 {
5695 foreach (S; TestTypes)
5696 {
5697 foreach (T; TestTypes)
5698 {
5699 foreach (U; TestTypes)
5700 {
5701 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef"));
5702 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef"));
5703 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx"));
5704 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx"));
5705 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx"));
5706 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef"));
5707 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd"));
5708 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye"));
5709 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye"));
5710 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul "));
5711 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc"));
5712 }
5713 }
5714
5715 auto s = to!S("hello world");
5716 static assert(is(typeof(s) == typeof(tr(s, "he", "if"))));
5717 }
5718 });
5719 }
5720
5721 @system pure unittest
5722 {
5723 import core.exception : AssertError;
5724 import std.exception : assertThrown;
5725 assertThrown!AssertError(tr("abcdef", "cd", "CD", "X"));
5726 }
5727
5728 /**
5729 * Takes a string $(D s) and determines if it represents a number. This function
5730 * also takes an optional parameter, $(D bAllowSep), which will accept the
5731 * separator characters $(D ',') and $(D '__') within the string. But these
5732 * characters should be stripped from the string before using any
5733 * of the conversion functions like $(D to!int()), $(D to!float()), and etc
5734 * else an error will occur.
5735 *
5736 * Also please note, that no spaces are allowed within the string
5737 * anywhere whether it's a leading, trailing, or embedded space(s),
5738 * thus they too must be stripped from the string before using this
5739 * function, or any of the conversion functions.
5740 *
5741 * Params:
5742 * s = the string or random access range to check
5743 * bAllowSep = accept separator characters or not
5744 *
5745 * Returns:
5746 * $(D bool)
5747 */
5748 bool isNumeric(S)(S s, bool bAllowSep = false)
5749 if (isSomeString!S ||
5750 (isRandomAccessRange!S &&
5751 hasSlicing!S &&
5752 isSomeChar!(ElementType!S) &&
5753 !isInfinite!S))
5754 {
5755 import std.algorithm.comparison : among;
5756 import std.ascii : isASCII;
5757
5758 // ASCII only case insensitive comparison with two ranges
5759 static bool asciiCmp(S1)(S1 a, string b)
5760 {
5761 import std.algorithm.comparison : equal;
5762 import std.algorithm.iteration : map;
5763 import std.ascii : toLower;
5764 import std.utf : byChar;
5765 return a.map!toLower.equal(b.byChar.map!toLower);
5766 }
5767
5768 // auto-decoding special case, we're only comparing characters
5769 // in the ASCII range so there's no reason to decode
5770 static if (isSomeString!S)
5771 {
5772 import std.utf : byCodeUnit;
5773 auto codeUnits = s.byCodeUnit;
5774 }
5775 else
5776 {
5777 alias codeUnits = s;
5778 }
5779
5780 if (codeUnits.empty)
5781 return false;
5782
5783 // Check for NaN (Not a Number) and for Infinity
5784 if (codeUnits.among!((a, b) => asciiCmp(a.save, b))
5785 ("nan", "nani", "nan+nani", "inf", "-inf"))
5786 return true;
5787
5788 immutable frontResult = codeUnits.front;
5789 if (frontResult == '-' || frontResult == '+')
5790 codeUnits.popFront;
5791
5792 immutable iLen = codeUnits.length;
5793 bool bDecimalPoint, bExponent, bComplex, sawDigits;
5794
5795 for (size_t i = 0; i < iLen; i++)
5796 {
5797 immutable c = codeUnits[i];
5798
5799 if (!c.isASCII)
5800 return false;
5801
5802 // Digits are good, skip to the next character
5803 if (c >= '0' && c <= '9')
5804 {
5805 sawDigits = true;
5806 continue;
5807 }
5808
5809 // Check for the complex type, and if found
5810 // reset the flags for checking the 2nd number.
5811 if (c == '+')
5812 {
5813 if (!i)
5814 return false;
5815 bDecimalPoint = false;
5816 bExponent = false;
5817 bComplex = true;
5818 sawDigits = false;
5819 continue;
5820 }
5821
5822 // Allow only one exponent per number
5823 if (c == 'e' || c == 'E')
5824 {
5825 // A 2nd exponent found, return not a number
5826 if (bExponent || i + 1 >= iLen)
5827 return false;
5828 // Look forward for the sign, and if
5829 // missing then this is not a number.
5830 if (codeUnits[i + 1] != '-' && codeUnits[i + 1] != '+')
5831 return false;
5832 bExponent = true;
5833 i++;
5834 continue;
5835 }
5836 // Allow only one decimal point per number to be used
5837 if (c == '.')
5838 {
5839 // A 2nd decimal point found, return not a number
5840 if (bDecimalPoint)
5841 return false;
5842 bDecimalPoint = true;
5843 continue;
5844 }
5845 // Check for ending literal characters: "f,u,l,i,ul,fi,li",
5846 // and whether they're being used with the correct datatype.
5847 if (i == iLen - 2)
5848 {
5849 if (!sawDigits)
5850 return false;
5851 // Integer Whole Number
5852 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
5853 (!bDecimalPoint && !bExponent && !bComplex))
5854 return true;
5855 // Floating-Point Number
5856 if (codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))("fi", "li") &&
5857 (bDecimalPoint || bExponent || bComplex))
5858 return true;
5859 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
5860 (bDecimalPoint || bExponent || bComplex))
5861 return false;
5862 // Could be a Integer or a Float, thus
5863 // all these suffixes are valid for both
5864 return codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))
5865 ("ul", "fi", "li") != 0;
5866 }
5867 if (i == iLen - 1)
5868 {
5869 if (!sawDigits)
5870 return false;
5871 // Integer Whole Number
5872 if (c.among!('u', 'l', 'U', 'L')() &&
5873 (!bDecimalPoint && !bExponent && !bComplex))
5874 return true;
5875 // Check to see if the last character in the string
5876 // is the required 'i' character
5877 if (bComplex)
5878 return c.among!('i', 'I')() != 0;
5879 // Floating-Point Number
5880 return c.among!('l', 'L', 'f', 'F', 'i', 'I')() != 0;
5881 }
5882 // Check if separators are allowed to be in the numeric string
5883 if (!bAllowSep || !c.among!('_', ',')())
5884 return false;
5885 }
5886
5887 return sawDigits;
5888 }
5889
5890 /**
5891 * Integer Whole Number: (byte, ubyte, short, ushort, int, uint, long, and ulong)
5892 * ['+'|'-']digit(s)[U|L|UL]
5893 */
5894 @safe @nogc pure nothrow unittest
5895 {
5896 assert(isNumeric("123"));
5897 assert(isNumeric("123UL"));
5898 assert(isNumeric("123L"));
5899 assert(isNumeric("+123U"));
5900 assert(isNumeric("-123L"));
5901 }
5902
5903 /**
5904 * Floating-Point Number: (float, double, real, ifloat, idouble, and ireal)
5905 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
5906 * or [nan|nani|inf|-inf]
5907 */
5908 @safe @nogc pure nothrow unittest
5909 {
5910 assert(isNumeric("+123"));
5911 assert(isNumeric("-123.01"));
5912 assert(isNumeric("123.3e-10f"));
5913 assert(isNumeric("123.3e-10fi"));
5914 assert(isNumeric("123.3e-10L"));
5915
5916 assert(isNumeric("nan"));
5917 assert(isNumeric("nani"));
5918 assert(isNumeric("-inf"));
5919 }
5920
5921 /**
5922 * Floating-Point Number: (cfloat, cdouble, and creal)
5923 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+]
5924 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
5925 * or [nan|nani|nan+nani|inf|-inf]
5926 */
5927 @safe @nogc pure nothrow unittest
5928 {
5929 assert(isNumeric("-123e-1+456.9e-10Li"));
5930 assert(isNumeric("+123e+10+456i"));
5931 assert(isNumeric("123+456"));
5932 }
5933
5934 @safe @nogc pure nothrow unittest
5935 {
5936 assert(!isNumeric("F"));
5937 assert(!isNumeric("L"));
5938 assert(!isNumeric("U"));
5939 assert(!isNumeric("i"));
5940 assert(!isNumeric("fi"));
5941 assert(!isNumeric("ul"));
5942 assert(!isNumeric("li"));
5943 assert(!isNumeric("."));
5944 assert(!isNumeric("-"));
5945 assert(!isNumeric("+"));
5946 assert(!isNumeric("e-"));
5947 assert(!isNumeric("e+"));
5948 assert(!isNumeric(".f"));
5949 assert(!isNumeric("e+f"));
5950 assert(!isNumeric("++1"));
5951 assert(!isNumeric(""));
5952 assert(!isNumeric("1E+1E+1"));
5953 assert(!isNumeric("1E1"));
5954 assert(!isNumeric("\x81"));
5955 }
5956
5957 // Test string types
5958 @safe unittest
5959 {
5960 import std.conv : to;
5961
5962 foreach (T; AliasSeq!(string, char[], wstring, wchar[], dstring, dchar[]))
5963 {
5964 assert("123".to!T.isNumeric());
5965 assert("123UL".to!T.isNumeric());
5966 assert("123fi".to!T.isNumeric());
5967 assert("123li".to!T.isNumeric());
5968 assert(!"--123L".to!T.isNumeric());
5969 }
5970 }
5971
5972 // test ranges
5973 @system pure unittest
5974 {
5975 import std.range : refRange;
5976 import std.utf : byCodeUnit;
5977
5978 assert("123".byCodeUnit.isNumeric());
5979 assert("123UL".byCodeUnit.isNumeric());
5980 assert("123fi".byCodeUnit.isNumeric());
5981 assert("123li".byCodeUnit.isNumeric());
5982 assert(!"--123L".byCodeUnit.isNumeric());
5983
5984 dstring z = "0";
5985 assert(isNumeric(refRange(&z)));
5986
5987 dstring nani = "nani";
5988 assert(isNumeric(refRange(&nani)));
5989 }
5990
5991 /// isNumeric works with CTFE
5992 @safe pure unittest
5993 {
5994 enum a = isNumeric("123.00E-5+1234.45E-12Li");
5995 enum b = isNumeric("12345xxxx890");
5996
5997 static assert( a);
5998 static assert(!b);
5999 }
6000
6001 @system unittest
6002 {
6003 import std.conv : to;
6004 import std.exception : assertCTFEable;
6005
6006 assertCTFEable!(
6007 {
6008 // Test the isNumeric(in string) function
6009 assert(isNumeric("1") == true );
6010 assert(isNumeric("1.0") == true );
6011 assert(isNumeric("1e-1") == true );
6012 assert(isNumeric("12345xxxx890") == false );
6013 assert(isNumeric("567L") == true );
6014 assert(isNumeric("23UL") == true );
6015 assert(isNumeric("-123..56f") == false );
6016 assert(isNumeric("12.3.5.6") == false );
6017 assert(isNumeric(" 12.356") == false );
6018 assert(isNumeric("123 5.6") == false );
6019 assert(isNumeric("1233E-1+1.0e-1i") == true );
6020
6021 assert(isNumeric("123.00E-5+1234.45E-12Li") == true);
6022 assert(isNumeric("123.00e-5+1234.45E-12iL") == false);
6023 assert(isNumeric("123.00e-5+1234.45e-12uL") == false);
6024 assert(isNumeric("123.00E-5+1234.45e-12lu") == false);
6025
6026 assert(isNumeric("123fi") == true);
6027 assert(isNumeric("123li") == true);
6028 assert(isNumeric("--123L") == false);
6029 assert(isNumeric("+123.5UL") == false);
6030 assert(isNumeric("123f") == true);
6031 assert(isNumeric("123.u") == false);
6032
6033 // @@@BUG@@ to!string(float) is not CTFEable.
6034 // Related: formatValue(T) if (is(FloatingPointTypeOf!T))
6035 if (!__ctfe)
6036 {
6037 assert(isNumeric(to!string(real.nan)) == true);
6038 assert(isNumeric(to!string(-real.infinity)) == true);
6039 assert(isNumeric(to!string(123e+2+1234.78Li)) == true);
6040 }
6041
6042 string s = "$250.99-";
6043 assert(isNumeric(s[1 .. s.length - 2]) == true);
6044 assert(isNumeric(s) == false);
6045 assert(isNumeric(s[0 .. s.length - 1]) == false);
6046 });
6047
6048 assert(!isNumeric("-"));
6049 assert(!isNumeric("+"));
6050 }
6051
6052 /*****************************
6053 * Soundex algorithm.
6054 *
6055 * The Soundex algorithm converts a word into 4 characters
6056 * based on how the word sounds phonetically. The idea is that
6057 * two spellings that sound alike will have the same Soundex
6058 * value, which means that Soundex can be used for fuzzy matching
6059 * of names.
6060 *
6061 * Params:
6062 * str = String or InputRange to convert to Soundex representation.
6063 *
6064 * Returns:
6065 * The four character array with the Soundex result in it.
6066 * The array has zero's in it if there is no Soundex representation for the string.
6067 *
6068 * See_Also:
6069 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia),
6070 * $(LUCKY The Soundex Indexing System)
6071 * $(LREF soundex)
6072 *
6073 * Bugs:
6074 * Only works well with English names.
6075 * There are other arguably better Soundex algorithms,
6076 * but this one is the standard one.
6077 */
6078 char[4] soundexer(Range)(Range str)
6079 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
6080 !isConvertibleToString!Range)
6081 {
6082 alias C = Unqual!(ElementEncodingType!Range);
6083
6084 static immutable dex =
6085 // ABCDEFGHIJKLMNOPQRSTUVWXYZ
6086 "01230120022455012623010202";
6087
6088 char[4] result = void;
6089 size_t b = 0;
6090 C lastc;
6091 foreach (C c; str)
6092 {
6093 if (c >= 'a' && c <= 'z')
6094 c -= 'a' - 'A';
6095 else if (c >= 'A' && c <= 'Z')
6096 {
6097 }
6098 else
6099 {
6100 lastc = lastc.init;
6101 continue;
6102 }
6103 if (b == 0)
6104 {
6105 result[0] = cast(char) c;
6106 b++;
6107 lastc = dex[c - 'A'];
6108 }
6109 else
6110 {
6111 if (c == 'H' || c == 'W')
6112 continue;
6113 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
6114 lastc = lastc.init;
6115 c = dex[c - 'A'];
6116 if (c != '0' && c != lastc)
6117 {
6118 result[b] = cast(char) c;
6119 b++;
6120 lastc = c;
6121 }
6122 if (b == 4)
6123 goto Lret;
6124 }
6125 }
6126 if (b == 0)
6127 result[] = 0;
6128 else
6129 result[b .. 4] = '0';
6130 Lret:
6131 return result;
6132 }
6133
6134 char[4] soundexer(Range)(auto ref Range str)
6135 if (isConvertibleToString!Range)
6136 {
6137 return soundexer!(StringTypeOf!Range)(str);
6138 }
6139
6140 /*****************************
6141 * Like $(LREF soundexer), but with different parameters
6142 * and return value.
6143 *
6144 * Params:
6145 * str = String to convert to Soundex representation.
6146 * buffer = Optional 4 char array to put the resulting Soundex
6147 * characters into. If null, the return value
6148 * buffer will be allocated on the heap.
6149 * Returns:
6150 * The four character array with the Soundex result in it.
6151 * Returns null if there is no Soundex representation for the string.
6152 * See_Also:
6153 * $(LREF soundexer)
6154 */
6155 char[] soundex(const(char)[] str, char[] buffer = null)
6156 @safe pure nothrow
6157 in
6158 {
6159 assert(buffer is null || buffer.length >= 4);
6160 }
6161 out (result)
6162 {
6163 if (result !is null)
6164 {
6165 assert(result.length == 4);
6166 assert(result[0] >= 'A' && result[0] <= 'Z');
6167 foreach (char c; result[1 .. 4])
6168 assert(c >= '0' && c <= '6');
6169 }
6170 }
6171 body
6172 {
6173 char[4] result = soundexer(str);
6174 if (result[0] == 0)
6175 return null;
6176 if (buffer is null)
6177 buffer = new char[4];
6178 buffer[] = result[];
6179 return buffer;
6180 }
6181
6182
6183 @safe pure nothrow unittest
6184 {
6185 import std.exception : assertCTFEable;
6186 assertCTFEable!(
6187 {
6188 char[4] buffer;
6189
6190 assert(soundex(null) == null);
6191 assert(soundex("") == null);
6192 assert(soundex("0123^&^^**&^") == null);
6193 assert(soundex("Euler") == "E460");
6194 assert(soundex(" Ellery ") == "E460");
6195 assert(soundex("Gauss") == "G200");
6196 assert(soundex("Ghosh") == "G200");
6197 assert(soundex("Hilbert") == "H416");
6198 assert(soundex("Heilbronn") == "H416");
6199 assert(soundex("Knuth") == "K530");
6200 assert(soundex("Kant", buffer) == "K530");
6201 assert(soundex("Lloyd") == "L300");
6202 assert(soundex("Ladd") == "L300");
6203 assert(soundex("Lukasiewicz", buffer) == "L222");
6204 assert(soundex("Lissajous") == "L222");
6205 assert(soundex("Robert") == "R163");
6206 assert(soundex("Rupert") == "R163");
6207 assert(soundex("Rubin") == "R150");
6208 assert(soundex("Washington") == "W252");
6209 assert(soundex("Lee") == "L000");
6210 assert(soundex("Gutierrez") == "G362");
6211 assert(soundex("Pfister") == "P236");
6212 assert(soundex("Jackson") == "J250");
6213 assert(soundex("Tymczak") == "T522");
6214 assert(soundex("Ashcraft") == "A261");
6215
6216 assert(soundex("Woo") == "W000");
6217 assert(soundex("Pilgrim") == "P426");
6218 assert(soundex("Flingjingwaller") == "F452");
6219 assert(soundex("PEARSE") == "P620");
6220 assert(soundex("PIERCE") == "P620");
6221 assert(soundex("Price") == "P620");
6222 assert(soundex("CATHY") == "C300");
6223 assert(soundex("KATHY") == "K300");
6224 assert(soundex("Jones") == "J520");
6225 assert(soundex("johnsons") == "J525");
6226 assert(soundex("Hardin") == "H635");
6227 assert(soundex("Martinez") == "M635");
6228
6229 import std.utf : byChar, byDchar, byWchar;
6230 assert(soundexer("Martinez".byChar ) == "M635");
6231 assert(soundexer("Martinez".byWchar) == "M635");
6232 assert(soundexer("Martinez".byDchar) == "M635");
6233 });
6234 }
6235
6236 @safe pure unittest
6237 {
6238 assert(testAliasedString!soundexer("Martinez"));
6239 }
6240
6241
6242 /***************************************************
6243 * Construct an associative array consisting of all
6244 * abbreviations that uniquely map to the strings in values.
6245 *
6246 * This is useful in cases where the user is expected to type
6247 * in one of a known set of strings, and the program will helpfully
6248 * auto-complete the string once sufficient characters have been
6249 * entered that uniquely identify it.
6250 */
6251
6252 string[string] abbrev(string[] values) @safe pure
6253 {
6254 import std.algorithm.sorting : sort;
6255
6256 string[string] result;
6257
6258 // Make a copy when sorting so we follow COW principles.
6259 values = values.dup;
6260 sort(values);
6261
6262 size_t values_length = values.length;
6263 size_t lasti = values_length;
6264 size_t nexti;
6265
6266 string nv;
6267 string lv;
6268
6269 for (size_t i = 0; i < values_length; i = nexti)
6270 {
6271 string value = values[i];
6272
6273 // Skip dups
6274 for (nexti = i + 1; nexti < values_length; nexti++)
6275 {
6276 nv = values[nexti];
6277 if (value != values[nexti])
6278 break;
6279 }
6280
6281 import std.utf : stride;
6282
6283 for (size_t j = 0; j < value.length; j += stride(value, j))
6284 {
6285 string v = value[0 .. j];
6286
6287 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) &&
6288 (lasti == values_length || j > lv.length || v != lv[0 .. j]))
6289 {
6290 result[v] = value;
6291 }
6292 }
6293 result[value] = value;
6294 lasti = i;
6295 lv = value;
6296 }
6297
6298 return result;
6299 }
6300
6301 ///
6302 @safe unittest
6303 {
6304 import std.string;
6305
6306 static string[] list = [ "food", "foxy" ];
6307 auto abbrevs = abbrev(list);
6308 assert(abbrevs == ["fox": "foxy", "food": "food",
6309 "foxy": "foxy", "foo": "food"]);
6310 }
6311
6312
6313 @system pure unittest
6314 {
6315 import std.algorithm.sorting : sort;
6316 import std.conv : to;
6317 import std.exception : assertCTFEable;
6318
6319 assertCTFEable!(
6320 {
6321 string[] values;
6322 values ~= "hello";
6323 values ~= "hello";
6324 values ~= "he";
6325
6326 string[string] r;
6327
6328 r = abbrev(values);
6329 auto keys = r.keys.dup;
6330 sort(keys);
6331
6332 assert(keys.length == 4);
6333 assert(keys[0] == "he");
6334 assert(keys[1] == "hel");
6335 assert(keys[2] == "hell");
6336 assert(keys[3] == "hello");
6337
6338 assert(r[keys[0]] == "he");
6339 assert(r[keys[1]] == "hello");
6340 assert(r[keys[2]] == "hello");
6341 assert(r[keys[3]] == "hello");
6342 });
6343 }
6344
6345
6346 /******************************************
6347 * Compute _column number at the end of the printed form of the string,
6348 * assuming the string starts in the leftmost _column, which is numbered
6349 * starting from 0.
6350 *
6351 * Tab characters are expanded into enough spaces to bring the _column number
6352 * to the next multiple of tabsize.
6353 * If there are multiple lines in the string, the _column number of the last
6354 * line is returned.
6355 *
6356 * Params:
6357 * str = string or InputRange to be analyzed
6358 * tabsize = number of columns a tab character represents
6359 *
6360 * Returns:
6361 * column number
6362 */
6363
6364 size_t column(Range)(Range str, in size_t tabsize = 8)
6365 if ((isInputRange!Range && isSomeChar!(Unqual!(ElementEncodingType!Range)) ||
6366 isNarrowString!Range) &&
6367 !isConvertibleToString!Range)
6368 {
6369 static if (is(Unqual!(ElementEncodingType!Range) == char))
6370 {
6371 // decoding needed for chars
6372 import std.utf : byDchar;
6373
6374 return str.byDchar.column(tabsize);
6375 }
6376 else
6377 {
6378 // decoding not needed for wchars and dchars
6379 import std.uni : lineSep, paraSep, nelSep;
6380
6381 size_t column;
6382
6383 foreach (const c; str)
6384 {
6385 switch (c)
6386 {
6387 case '\t':
6388 column = (column + tabsize) / tabsize * tabsize;
6389 break;
6390
6391 case '\r':
6392 case '\n':
6393 case paraSep:
6394 case lineSep:
6395 case nelSep:
6396 column = 0;
6397 break;
6398
6399 default:
6400 column++;
6401 break;
6402 }
6403 }
6404 return column;
6405 }
6406 }
6407
6408 ///
6409 @safe pure unittest
6410 {
6411 import std.utf : byChar, byWchar, byDchar;
6412
6413 assert(column("1234 ") == 5);
6414 assert(column("1234 "w) == 5);
6415 assert(column("1234 "d) == 5);
6416
6417 assert(column("1234 ".byChar()) == 5);
6418 assert(column("1234 "w.byWchar()) == 5);
6419 assert(column("1234 "d.byDchar()) == 5);
6420
6421 // Tab stops are set at 8 spaces by default; tab characters insert enough
6422 // spaces to bring the column position to the next multiple of 8.
6423 assert(column("\t") == 8);
6424 assert(column("1\t") == 8);
6425 assert(column("\t1") == 9);
6426 assert(column("123\t") == 8);
6427
6428 // Other tab widths are possible by specifying it explicitly:
6429 assert(column("\t", 4) == 4);
6430 assert(column("1\t", 4) == 4);
6431 assert(column("\t1", 4) == 5);
6432 assert(column("123\t", 4) == 4);
6433
6434 // New lines reset the column number.
6435 assert(column("abc\n") == 0);
6436 assert(column("abc\n1") == 1);
6437 assert(column("abcdefg\r1234") == 4);
6438 assert(column("abc\u20281") == 1);
6439 assert(column("abc\u20291") == 1);
6440 assert(column("abc\u00851") == 1);
6441 assert(column("abc\u00861") == 5);
6442 }
6443
6444 size_t column(Range)(auto ref Range str, in size_t tabsize = 8)
6445 if (isConvertibleToString!Range)
6446 {
6447 return column!(StringTypeOf!Range)(str, tabsize);
6448 }
6449
6450 @safe pure unittest
6451 {
6452 assert(testAliasedString!column("abc\u00861"));
6453 }
6454
6455 @safe @nogc unittest
6456 {
6457 import std.conv : to;
6458 import std.exception : assertCTFEable;
6459
6460 assertCTFEable!(
6461 {
6462 assert(column(string.init) == 0);
6463 assert(column("") == 0);
6464 assert(column("\t") == 8);
6465 assert(column("abc\t") == 8);
6466 assert(column("12345678\t") == 16);
6467 });
6468 }
6469
6470 /******************************************
6471 * Wrap text into a paragraph.
6472 *
6473 * The input text string s is formed into a paragraph
6474 * by breaking it up into a sequence of lines, delineated
6475 * by \n, such that the number of columns is not exceeded
6476 * on each line.
6477 * The last line is terminated with a \n.
6478 * Params:
6479 * s = text string to be wrapped
6480 * columns = maximum number of _columns in the paragraph
6481 * firstindent = string used to _indent first line of the paragraph
6482 * indent = string to use to _indent following lines of the paragraph
6483 * tabsize = column spacing of tabs in firstindent[] and indent[]
6484 * Returns:
6485 * resulting paragraph as an allocated string
6486 */
6487
6488 S wrap(S)(S s, in size_t columns = 80, S firstindent = null,
6489 S indent = null, in size_t tabsize = 8)
6490 if (isSomeString!S)
6491 {
6492 import std.uni : isWhite;
6493 typeof(s.dup) result;
6494 bool inword;
6495 bool first = true;
6496 size_t wordstart;
6497
6498 const indentcol = column(indent, tabsize);
6499
6500 result.length = firstindent.length + s.length;
6501 result.length = firstindent.length;
6502 result[] = firstindent[];
6503 auto col = column(firstindent, tabsize);
6504 foreach (size_t i, dchar c; s)
6505 {
6506 if (isWhite(c))
6507 {
6508 if (inword)
6509 {
6510 if (first)
6511 {
6512 }
6513 else if (col + 1 + (i - wordstart) > columns)
6514 {
6515 result ~= '\n';
6516 result ~= indent;
6517 col = indentcol;
6518 }
6519 else
6520 {
6521 result ~= ' ';
6522 col += 1;
6523 }
6524 result ~= s[wordstart .. i];
6525 col += i - wordstart;
6526 inword = false;
6527 first = false;
6528 }
6529 }
6530 else
6531 {
6532 if (!inword)
6533 {
6534 wordstart = i;
6535 inword = true;
6536 }
6537 }
6538 }
6539
6540 if (inword)
6541 {
6542 if (col + 1 + (s.length - wordstart) >= columns)
6543 {
6544 result ~= '\n';
6545 result ~= indent;
6546 }
6547 else if (result.length != firstindent.length)
6548 result ~= ' ';
6549 result ~= s[wordstart .. s.length];
6550 }
6551 result ~= '\n';
6552
6553 return result;
6554 }
6555
6556 ///
6557 @safe pure unittest
6558 {
6559 assert(wrap("a short string", 7) == "a short\nstring\n");
6560
6561 // wrap will not break inside of a word, but at the next space
6562 assert(wrap("a short string", 4) == "a\nshort\nstring\n");
6563
6564 assert(wrap("a short string", 7, "\t") == "\ta\nshort\nstring\n");
6565 assert(wrap("a short string", 7, "\t", " ") == "\ta\n short\n string\n");
6566 }
6567
6568 @safe pure unittest
6569 {
6570 import std.conv : to;
6571 import std.exception : assertCTFEable;
6572
6573 assertCTFEable!(
6574 {
6575 assert(wrap(string.init) == "\n");
6576 assert(wrap(" a b df ") == "a b df\n");
6577 assert(wrap(" a b df ", 3) == "a b\ndf\n");
6578 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n");
6579 assert(wrap(" abcd df ", 3) == "abcd\ndf\n");
6580 assert(wrap("x") == "x\n");
6581 assert(wrap("u u") == "u u\n");
6582 assert(wrap("abcd", 3) == "\nabcd\n");
6583 assert(wrap("a de", 10, "\t", " ", 8) == "\ta\n de\n");
6584 });
6585 }
6586
6587 /******************************************
6588 * Removes one level of indentation from a multi-line string.
6589 *
6590 * This uniformly outdents the text as much as possible.
6591 * Whitespace-only lines are always converted to blank lines.
6592 *
6593 * Does not allocate memory if it does not throw.
6594 *
6595 * Params:
6596 * str = multi-line string
6597 *
6598 * Returns:
6599 * outdented string
6600 *
6601 * Throws:
6602 * StringException if indentation is done with different sequences
6603 * of whitespace characters.
6604 */
6605 S outdent(S)(S str) @safe pure
6606 if (isSomeString!S)
6607 {
6608 return str.splitLines(Yes.keepTerminator).outdent().join();
6609 }
6610
6611 ///
6612 @safe pure unittest
6613 {
6614 enum pretty = q{
6615 import std.stdio;
6616 void main() {
6617 writeln("Hello");
6618 }
6619 }.outdent();
6620
6621 enum ugly = q{
6622 import std.stdio;
6623 void main() {
6624 writeln("Hello");
6625 }
6626 };
6627
6628 assert(pretty == ugly);
6629 }
6630
6631
6632 /******************************************
6633 * Removes one level of indentation from an array of single-line strings.
6634 *
6635 * This uniformly outdents the text as much as possible.
6636 * Whitespace-only lines are always converted to blank lines.
6637 *
6638 * Params:
6639 * lines = array of single-line strings
6640 *
6641 * Returns:
6642 * lines[] is rewritten in place with outdented lines
6643 *
6644 * Throws:
6645 * StringException if indentation is done with different sequences
6646 * of whitespace characters.
6647 */
6648 S[] outdent(S)(S[] lines) @safe pure
6649 if (isSomeString!S)
6650 {
6651 import std.algorithm.searching : startsWith;
6652
6653 if (lines.empty)
6654 {
6655 return null;
6656 }
6657
6658 static S leadingWhiteOf(S str)
6659 {
6660 return str[ 0 .. $ - stripLeft(str).length ];
6661 }
6662
6663 S shortestIndent;
6664 foreach (ref line; lines)
6665 {
6666 const stripped = line.stripLeft();
6667
6668 if (stripped.empty)
6669 {
6670 line = line[line.chomp().length .. $];
6671 }
6672 else
6673 {
6674 const indent = leadingWhiteOf(line);
6675
6676 // Comparing number of code units instead of code points is OK here
6677 // because this function throws upon inconsistent indentation.
6678 if (shortestIndent is null || indent.length < shortestIndent.length)
6679 {
6680 if (indent.empty)
6681 return lines;
6682 shortestIndent = indent;
6683 }
6684 }
6685 }
6686
6687 foreach (ref line; lines)
6688 {
6689 const stripped = line.stripLeft();
6690
6691 if (stripped.empty)
6692 {
6693 // Do nothing
6694 }
6695 else if (line.startsWith(shortestIndent))
6696 {
6697 line = line[shortestIndent.length .. $];
6698 }
6699 else
6700 {
6701 throw new StringException("outdent: Inconsistent indentation");
6702 }
6703 }
6704
6705 return lines;
6706 }
6707
6708 @safe pure unittest
6709 {
6710 import std.conv : to;
6711 import std.exception : assertCTFEable;
6712
6713 template outdent_testStr(S)
6714 {
6715 enum S outdent_testStr =
6716 "
6717 \t\tX
6718 \t\U00010143X
6719 \t\t
6720
6721 \t\t\tX
6722 \t ";
6723 }
6724
6725 template outdent_expected(S)
6726 {
6727 enum S outdent_expected =
6728 "
6729 \tX
6730 \U00010143X
6731
6732
6733 \t\tX
6734 ";
6735 }
6736
6737 assertCTFEable!(
6738 {
6739
6740 foreach (S; AliasSeq!(string, wstring, dstring))
6741 {
6742 enum S blank = "";
6743 assert(blank.outdent() == blank);
6744 static assert(blank.outdent() == blank);
6745
6746 enum S testStr1 = " \n \t\n ";
6747 enum S expected1 = "\n\n";
6748 assert(testStr1.outdent() == expected1);
6749 static assert(testStr1.outdent() == expected1);
6750
6751 assert(testStr1[0..$-1].outdent() == expected1);
6752 static assert(testStr1[0..$-1].outdent() == expected1);
6753
6754 enum S testStr2 = "a\n \t\nb";
6755 assert(testStr2.outdent() == testStr2);
6756 static assert(testStr2.outdent() == testStr2);
6757
6758 enum S testStr3 =
6759 "
6760 \t\tX
6761 \t\U00010143X
6762 \t\t
6763
6764 \t\t\tX
6765 \t ";
6766
6767 enum S expected3 =
6768 "
6769 \tX
6770 \U00010143X
6771
6772
6773 \t\tX
6774 ";
6775 assert(testStr3.outdent() == expected3);
6776 static assert(testStr3.outdent() == expected3);
6777
6778 enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X";
6779 enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X";
6780 assert(testStr4.outdent() == expected4);
6781 static assert(testStr4.outdent() == expected4);
6782
6783 enum testStr5 = testStr4[0..$-1];
6784 enum expected5 = expected4[0..$-1];
6785 assert(testStr5.outdent() == expected5);
6786 static assert(testStr5.outdent() == expected5);
6787
6788 enum testStr6 = " \r \n \r\n \u2028 \u2029";
6789 enum expected6 = "\r\n\r\n\u2028\u2029";
6790 assert(testStr6.outdent() == expected6);
6791 static assert(testStr6.outdent() == expected6);
6792
6793 enum testStr7 = " a \n b ";
6794 enum expected7 = "a \nb ";
6795 assert(testStr7.outdent() == expected7);
6796 static assert(testStr7.outdent() == expected7);
6797 }
6798 });
6799 }
6800
6801 @safe pure unittest
6802 {
6803 import std.exception : assertThrown;
6804 auto bad = " a\n\tb\n c";
6805 assertThrown!StringException(bad.outdent);
6806 }
6807
6808 /** Assume the given array of integers $(D arr) is a well-formed UTF string and
6809 return it typed as a UTF string.
6810
6811 $(D ubyte) becomes $(D char), $(D ushort) becomes $(D wchar) and $(D uint)
6812 becomes $(D dchar). Type qualifiers are preserved.
6813
6814 When compiled with debug mode, this function performs an extra check to make
6815 sure the return value is a valid Unicode string.
6816
6817 Params:
6818 arr = array of bytes, ubytes, shorts, ushorts, ints, or uints
6819
6820 Returns:
6821 arr retyped as an array of chars, wchars, or dchars
6822
6823 See_Also: $(LREF representation)
6824 */
6825 auto assumeUTF(T)(T[] arr) pure
6826 if (staticIndexOf!(Unqual!T, ubyte, ushort, uint) != -1)
6827 {
6828 import std.traits : ModifyTypePreservingTQ;
6829 import std.utf : validate;
6830 alias ToUTFType(U) = AliasSeq!(char, wchar, dchar)[U.sizeof / 2];
6831 auto asUTF = cast(ModifyTypePreservingTQ!(ToUTFType, T)[])arr;
6832 debug validate(asUTF);
6833 return asUTF;
6834 }
6835
6836 ///
6837 @safe pure unittest
6838 {
6839 string a = "Hölo World";
6840 immutable(ubyte)[] b = a.representation;
6841 string c = b.assumeUTF;
6842
6843 assert(a == c);
6844 }
6845
6846 pure @system unittest
6847 {
6848 import std.algorithm.comparison : equal;
6849 foreach (T; AliasSeq!(char[], wchar[], dchar[]))
6850 {
6851 immutable T jti = "Hello World";
6852 T jt = jti.dup;
6853
6854 static if (is(T == char[]))
6855 {
6856 auto gt = cast(ubyte[]) jt;
6857 auto gtc = cast(const(ubyte)[])jt;
6858 auto gti = cast(immutable(ubyte)[])jt;
6859 }
6860 else static if (is(T == wchar[]))
6861 {
6862 auto gt = cast(ushort[]) jt;
6863 auto gtc = cast(const(ushort)[])jt;
6864 auto gti = cast(immutable(ushort)[])jt;
6865 }
6866 else static if (is(T == dchar[]))
6867 {
6868 auto gt = cast(uint[]) jt;
6869 auto gtc = cast(const(uint)[])jt;
6870 auto gti = cast(immutable(uint)[])jt;
6871 }
6872
6873 auto ht = assumeUTF(gt);
6874 auto htc = assumeUTF(gtc);
6875 auto hti = assumeUTF(gti);
6876 assert(equal(jt, ht));
6877 assert(equal(jt, htc));
6878 assert(equal(jt, hti));
6879 }
6880 }
6881