1 // Written in the D programming language.
2 
3 /++
4     Functions which operate on ASCII characters.
5 
6     All of the functions in std._ascii accept Unicode characters but
7     effectively ignore them if they're not ASCII. All $(D isX) functions return
8     $(D false) for non-ASCII characters, and all $(D toX) functions do nothing
9     to non-ASCII characters.
10 
11     For functions which operate on Unicode characters, see
12     $(MREF std, uni).
13 
14 $(SCRIPT inhibitQuickIndex = 1;)
15 $(DIVC quickindex,
16 $(BOOKTABLE,
17 $(TR $(TH Category) $(TH Functions))
18 $(TR $(TD Validation) $(TD
19         $(LREF isAlpha)
20         $(LREF isAlphaNum)
21         $(LREF isASCII)
22         $(LREF isControl)
23         $(LREF isDigit)
24         $(LREF isGraphical)
25         $(LREF isHexDigit)
26         $(LREF isOctalDigit)
27         $(LREF isPrintable)
28         $(LREF isPunctuation)
29         $(LREF isUpper)
30         $(LREF isWhite)
31 ))
32 $(TR $(TD Conversions) $(TD
33         $(LREF toLower)
34         $(LREF toUpper)
35 ))
36 $(TR $(TD Constants) $(TD
37         $(LREF digits)
38         $(LREF fullHexDigits)
39         $(LREF hexDigits)
40         $(LREF letters)
41         $(LREF lowercase)
42         $(LREF lowerHexDigits)
43         $(LREF newline)
44         $(LREF octalDigits)
45         $(LREF uppercase)
46         $(LREF whitespace)
47 ))
48 $(TR $(TD Enums) $(TD
49         $(LREF LetterCase)
50 ))
51 ))
52     References:
53         $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
54         $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia)
55 
56     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
57     Authors:   $(HTTP digitalmars.com, Walter Bright) and Jonathan M Davis
58     Source:    $(PHOBOSSRC std/_ascii.d)
59   +/
60 module std.ascii;
61 
version(unittest)62 version (unittest)
63 {
64     // FIXME: When dmd bug #314 is fixed, make these selective.
65     import std.meta; // : AliasSeq;
66     import std.range; // : chain;
67     import std.traits; // : functionAttributes, FunctionAttribute, isSafe;
68 }
69 
70 
71 immutable fullHexDigits  = "0123456789ABCDEFabcdef";     /// 0 .. 9A .. Fa .. f
72 immutable hexDigits      = fullHexDigits[0 .. 16];         /// 0 .. 9A .. F
73 immutable lowerHexDigits = "0123456789abcdef";           /// 0 .. 9a .. f
74 immutable digits         = hexDigits[0 .. 10];             /// 0 .. 9
75 immutable octalDigits    = digits[0 .. 8];                 /// 0 .. 7
76 immutable letters        = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z
77 immutable uppercase      = letters[0 .. 26];               /// A .. Z
78 immutable lowercase      = letters[26 .. 52];              /// a .. z
79 immutable whitespace     = " \t\v\r\n\f";                /// ASCII _whitespace
80 
81 /++
82     Letter case specifier.
83   +/
84 enum LetterCase : bool
85 {
86     upper, /// Upper case letters
87     lower  /// Lower case letters
88 }
89 
90 ///
91 @safe unittest
92 {
93     import std.conv : to;
94 
95     assert(42.to!string(16, LetterCase.upper) == "2A");
96     assert(42.to!string(16, LetterCase.lower) == "2a");
97 }
98 
99 ///
100 @system unittest
101 {
102     import std.digest.hmac : hmac;
103     import std.digest.digest : toHexString;
104     import std.digest.sha : SHA1;
105     import std.string : representation;
106 
107     const sha1HMAC = "A very long phrase".representation
108         .hmac!SHA1("secret".representation)
109         .toHexString!(LetterCase.lower);
110     assert(sha1HMAC == "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5");
111 }
112 
113 /// Newline sequence for this system.
114 version (Windows)
115     immutable newline = "\r\n";
116 else version (Posix)
117     immutable newline = "\n";
118 else
119     static assert(0, "Unsupported OS");
120 
121 
122 /++
123     Params: c = The character to test.
124     Returns: Whether $(D c) is a letter or a number (0 .. 9, a .. z, A .. Z).
125   +/
126 bool isAlphaNum(dchar c) @safe pure nothrow @nogc
127 {
128     return c <= 'z' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'Z'));
129 }
130 
131 ///
132 @safe pure nothrow @nogc unittest
133 {
134     assert( isAlphaNum('A'));
135     assert( isAlphaNum('1'));
136     assert(!isAlphaNum('#'));
137 
138     // N.B.: does not return true for non-ASCII Unicode alphanumerics:
139     assert(!isAlphaNum('á'));
140 }
141 
142 @safe unittest
143 {
144     foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase))
145         assert(isAlphaNum(c));
146 
147     foreach (c; whitespace)
148         assert(!isAlphaNum(c));
149 }
150 
151 
152 /++
153     Params: c = The character to test.
154     Returns: Whether $(D c) is an ASCII letter (A .. Z, a .. z).
155   +/
156 bool isAlpha(dchar c) @safe pure nothrow @nogc
157 {
158     // Optimizer can turn this into a bitmask operation on 64 bit code
159     return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
160 }
161 
162 ///
163 @safe pure nothrow @nogc unittest
164 {
165     assert( isAlpha('A'));
166     assert(!isAlpha('1'));
167     assert(!isAlpha('#'));
168 
169     // N.B.: does not return true for non-ASCII Unicode alphabetic characters:
170     assert(!isAlpha('á'));
171 }
172 
173 @safe unittest
174 {
175     foreach (c; chain(letters, lowercase, uppercase))
176         assert(isAlpha(c));
177 
178     foreach (c; chain(digits, octalDigits, whitespace))
179         assert(!isAlpha(c));
180 }
181 
182 
183 /++
184     Params: c = The character to test.
185     Returns: Whether $(D c) is a lowercase ASCII letter (a .. z).
186   +/
187 bool isLower(dchar c) @safe pure nothrow @nogc
188 {
189     return c >= 'a' && c <= 'z';
190 }
191 
192 ///
193 @safe pure nothrow @nogc unittest
194 {
195     assert( isLower('a'));
196     assert(!isLower('A'));
197     assert(!isLower('#'));
198 
199     // N.B.: does not return true for non-ASCII Unicode lowercase letters
200     assert(!isLower('á'));
201     assert(!isLower('Á'));
202 }
203 
204 @safe unittest
205 {
206     foreach (c; lowercase)
207         assert(isLower(c));
208 
209     foreach (c; chain(digits, uppercase, whitespace))
210         assert(!isLower(c));
211 }
212 
213 
214 /++
215     Params: c = The character to test.
216     Returns: Whether $(D c) is an uppercase ASCII letter (A .. Z).
217   +/
218 bool isUpper(dchar c) @safe pure nothrow @nogc
219 {
220     return c <= 'Z' && 'A' <= c;
221 }
222 
223 ///
224 @safe pure nothrow @nogc unittest
225 {
226     assert( isUpper('A'));
227     assert(!isUpper('a'));
228     assert(!isUpper('#'));
229 
230     // N.B.: does not return true for non-ASCII Unicode uppercase letters
231     assert(!isUpper('á'));
232     assert(!isUpper('Á'));
233 }
234 
235 @safe unittest
236 {
237     foreach (c; uppercase)
238         assert(isUpper(c));
239 
240     foreach (c; chain(digits, lowercase, whitespace))
241         assert(!isUpper(c));
242 }
243 
244 
245 /++
246     Params: c = The character to test.
247     Returns: Whether $(D c) is a digit (0 .. 9).
248   +/
249 bool isDigit(dchar c) @safe pure nothrow @nogc
250 {
251     return '0' <= c && c <= '9';
252 }
253 
254 ///
255 @safe pure nothrow @nogc unittest
256 {
257     assert( isDigit('3'));
258     assert( isDigit('8'));
259     assert(!isDigit('B'));
260     assert(!isDigit('#'));
261 
262     // N.B.: does not return true for non-ASCII Unicode numbers
263     assert(!isDigit('')); // full-width digit zero (U+FF10)
264     assert(!isDigit('')); // full-width digit four (U+FF14)
265 }
266 
267 @safe unittest
268 {
269     foreach (c; digits)
270         assert(isDigit(c));
271 
272     foreach (c; chain(letters, whitespace))
273         assert(!isDigit(c));
274 }
275 
276 
277 /++
278     Params: c = The character to test.
279     Returns: Whether $(D c) is a digit in base 8 (0 .. 7).
280   +/
281 bool isOctalDigit(dchar c) @safe pure nothrow @nogc
282 {
283     return c >= '0' && c <= '7';
284 }
285 
286 ///
287 @safe pure nothrow @nogc unittest
288 {
289     assert( isOctalDigit('0'));
290     assert( isOctalDigit('7'));
291     assert(!isOctalDigit('8'));
292     assert(!isOctalDigit('A'));
293     assert(!isOctalDigit('#'));
294 }
295 
296 @safe unittest
297 {
298     foreach (c; octalDigits)
299         assert(isOctalDigit(c));
300 
301     foreach (c; chain(letters, ['8', '9'], whitespace))
302         assert(!isOctalDigit(c));
303 }
304 
305 
306 /++
307     Params: c = The character to test.
308     Returns: Whether $(D c) is a digit in base 16 (0 .. 9, A .. F, a .. f).
309   +/
310 bool isHexDigit(dchar c) @safe pure nothrow @nogc
311 {
312     return c <= 'f' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'F'));
313 }
314 
315 ///
316 @safe pure nothrow @nogc unittest
317 {
318     assert( isHexDigit('0'));
319     assert( isHexDigit('A'));
320     assert( isHexDigit('f')); // lowercase hex digits are accepted
321     assert(!isHexDigit('g'));
322     assert(!isHexDigit('G'));
323     assert(!isHexDigit('#'));
324 }
325 
326 @safe unittest
327 {
328     foreach (c; fullHexDigits)
329         assert(isHexDigit(c));
330 
331     foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace))
332         assert(!isHexDigit(c));
333 }
334 
335 
336 /++
337     Params: c = The character to test.
338     Returns: Whether or not $(D c) is a whitespace character. That includes the
339     space, tab, vertical tab, form feed, carriage return, and linefeed
340     characters.
341   +/
342 bool isWhite(dchar c) @safe pure nothrow @nogc
343 {
344     return c == ' ' || (c >= 0x09 && c <= 0x0D);
345 }
346 
347 ///
348 @safe pure nothrow @nogc unittest
349 {
350     assert( isWhite(' '));
351     assert( isWhite('\t'));
352     assert( isWhite('\n'));
353     assert(!isWhite('1'));
354     assert(!isWhite('a'));
355     assert(!isWhite('#'));
356 
357     // N.B.: Does not return true for non-ASCII Unicode whitespace characters.
358     static import std.uni;
359     assert(std.uni.isWhite('\u00A0'));
360     assert(!isWhite('\u00A0')); // std.ascii.isWhite
361 }
362 
363 @safe unittest
364 {
365     foreach (c; whitespace)
366         assert(isWhite(c));
367 
368     foreach (c; chain(digits, letters))
369         assert(!isWhite(c));
370 }
371 
372 
373 /++
374     Params: c = The character to test.
375     Returns: Whether $(D c) is a control character.
376   +/
377 bool isControl(dchar c) @safe pure nothrow @nogc
378 {
379     return c < 0x20 || c == 0x7F;
380 }
381 
382 ///
383 @safe pure nothrow @nogc unittest
384 {
385     assert( isControl('\0'));
386     assert( isControl('\022'));
387     assert( isControl('\n')); // newline is both whitespace and control
388     assert(!isControl(' '));
389     assert(!isControl('1'));
390     assert(!isControl('a'));
391     assert(!isControl('#'));
392 
393     // N.B.: non-ASCII Unicode control characters are not recognized:
394     assert(!isControl('\u0080'));
395     assert(!isControl('\u2028'));
396     assert(!isControl('\u2029'));
397 }
398 
399 @safe unittest
400 {
401     foreach (dchar c; 0 .. 32)
402         assert(isControl(c));
403     assert(isControl(127));
404 
405     foreach (c; chain(digits, letters, [' ']))
406         assert(!isControl(c));
407 }
408 
409 
410 /++
411     Params: c = The character to test.
412     Returns: Whether or not $(D c) is a punctuation character. That includes
413     all ASCII characters which are not control characters, letters, digits, or
414     whitespace.
415   +/
416 bool isPunctuation(dchar c) @safe pure nothrow @nogc
417 {
418     return c <= '~' && c >= '!' && !isAlphaNum(c);
419 }
420 
421 ///
422 @safe pure nothrow @nogc unittest
423 {
424     assert( isPunctuation('.'));
425     assert( isPunctuation(','));
426     assert( isPunctuation(':'));
427     assert( isPunctuation('!'));
428     assert( isPunctuation('#'));
429     assert( isPunctuation('~'));
430     assert( isPunctuation('+'));
431     assert( isPunctuation('_'));
432 
433     assert(!isPunctuation('1'));
434     assert(!isPunctuation('a'));
435     assert(!isPunctuation(' '));
436     assert(!isPunctuation('\n'));
437     assert(!isPunctuation('\0'));
438 
439     // N.B.: Non-ASCII Unicode punctuation characters are not recognized.
440     assert(!isPunctuation('\u2012')); // (U+2012 = en-dash)
441 }
442 
443 @safe unittest
444 {
445     foreach (dchar c; 0 .. 128)
446     {
447         if (isControl(c) || isAlphaNum(c) || c == ' ')
448             assert(!isPunctuation(c));
449         else
450             assert(isPunctuation(c));
451     }
452 }
453 
454 
455 /++
456     Params: c = The character to test.
457     Returns: Whether or not $(D c) is a printable character other than the
458     space character.
459   +/
460 bool isGraphical(dchar c) @safe pure nothrow @nogc
461 {
462     return '!' <= c && c <= '~';
463 }
464 
465 ///
466 @safe pure nothrow @nogc unittest
467 {
468     assert( isGraphical('1'));
469     assert( isGraphical('a'));
470     assert( isGraphical('#'));
471     assert(!isGraphical(' ')); // whitespace is not graphical
472     assert(!isGraphical('\n'));
473     assert(!isGraphical('\0'));
474 
475     // N.B.: Unicode graphical characters are not regarded as such.
476     assert(!isGraphical('á'));
477 }
478 
479 @safe unittest
480 {
481     foreach (dchar c; 0 .. 128)
482     {
483         if (isControl(c) || c == ' ')
484             assert(!isGraphical(c));
485         else
486             assert(isGraphical(c));
487     }
488 }
489 
490 
491 /++
492     Params: c = The character to test.
493     Returns: Whether or not $(D c) is a printable character - including the
494     space character.
495   +/
496 bool isPrintable(dchar c) @safe pure nothrow @nogc
497 {
498     return c >= ' ' && c <= '~';
499 }
500 
501 ///
502 @safe pure nothrow @nogc unittest
503 {
504     assert( isPrintable(' '));  // whitespace is printable
505     assert( isPrintable('1'));
506     assert( isPrintable('a'));
507     assert( isPrintable('#'));
508     assert(!isPrintable('\0')); // control characters are not printable
509 
510     // N.B.: Printable non-ASCII Unicode characters are not recognized.
511     assert(!isPrintable('á'));
512 }
513 
514 @safe unittest
515 {
516     foreach (dchar c; 0 .. 128)
517     {
518         if (isControl(c))
519             assert(!isPrintable(c));
520         else
521             assert(isPrintable(c));
522     }
523 }
524 
525 
526 /++
527     Params: c = The character to test.
528     Returns: Whether or not $(D c) is in the ASCII character set - i.e. in the
529     range 0 .. 0x7F.
530   +/
531 pragma(inline, true)
532 bool isASCII(dchar c) @safe pure nothrow @nogc
533 {
534     return c <= 0x7F;
535 }
536 
537 ///
538 @safe pure nothrow @nogc unittest
539 {
540     assert( isASCII('a'));
541     assert(!isASCII('á'));
542 }
543 
544 @safe unittest
545 {
546     foreach (dchar c; 0 .. 128)
547         assert(isASCII(c));
548 
549     assert(!isASCII(128));
550 }
551 
552 
553 /++
554     Converts an ASCII letter to lowercase.
555 
556     Params: c = A character of any type that implicitly converts to $(D dchar).
557     In the case where it's a built-in type, or an enum of a built-in type,
558     $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
559     type, $(D dchar) is returned.
560 
561     Returns: The corresponding lowercase letter, if $(D c) is an uppercase
562     ASCII character, otherwise $(D c) itself.
563   +/
564 auto toLower(C)(C c)
565 if (is(C : dchar))
566 {
567     import std.traits : isAggregateType, OriginalType, Unqual;
568 
569     alias OC = OriginalType!C;
570     static if (isAggregateType!OC)
571         alias R = dchar;
572     else
573         alias R = Unqual!OC;
574 
575     return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c;
576 }
577 
578 ///
579 @safe pure nothrow @nogc unittest
580 {
581     assert(toLower('a') == 'a');
582     assert(toLower('A') == 'a');
583     assert(toLower('#') == '#');
584 
585     // N.B.: Non-ASCII Unicode uppercase letters are not converted.
586     assert(toLower('Á') == 'Á');
587 }
588 
589 @safe pure nothrow unittest
590 {
591 
592     foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
593     {
594         foreach (i, c; uppercase)
595             assert(toLower(cast(C) c) == lowercase[i]);
596 
597         foreach (C c; 0 .. 128)
598         {
599             if (c < 'A' || c > 'Z')
600                 assert(toLower(c) == c);
601             else
602                 assert(toLower(c) != c);
603         }
604 
605         foreach (C c; 128 .. C.max)
606             assert(toLower(c) == c);
607 
608         //CTFE
609         static assert(toLower(cast(C)'a') == 'a');
610         static assert(toLower(cast(C)'A') == 'a');
611     }
612 }
613 
614 
615 /++
616     Converts an ASCII letter to uppercase.
617 
618     Params: c = Any type which implicitly converts to $(D dchar). In the case
619     where it's a built-in type, or an enum of a built-in type,
620     $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
621     type, $(D dchar) is returned.
622 
623     Returns: The corresponding uppercase letter, if $(D c) is a lowercase ASCII
624     character, otherwise $(D c) itself.
625   +/
626 auto toUpper(C)(C c)
627 if (is(C : dchar))
628 {
629     import std.traits : isAggregateType, OriginalType, Unqual;
630 
631     alias OC = OriginalType!C;
632     static if (isAggregateType!OC)
633         alias R = dchar;
634     else
635         alias R = Unqual!OC;
636 
637     return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c;
638 }
639 
640 ///
641 @safe pure nothrow @nogc unittest
642 {
643     assert(toUpper('a') == 'A');
644     assert(toUpper('A') == 'A');
645     assert(toUpper('#') == '#');
646 
647     // N.B.: Non-ASCII Unicode lowercase letters are not converted.
648     assert(toUpper('á') == 'á');
649 }
650 
651 @safe pure nothrow unittest
652 {
653     foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
654     {
655         foreach (i, c; lowercase)
656             assert(toUpper(cast(C) c) == uppercase[i]);
657 
658         foreach (C c; 0 .. 128)
659         {
660             if (c < 'a' || c > 'z')
661                 assert(toUpper(c) == c);
662             else
663                 assert(toUpper(c) != c);
664         }
665 
666         foreach (C c; 128 .. C.max)
667             assert(toUpper(c) == c);
668 
669         //CTFE
670         static assert(toUpper(cast(C)'a') == 'A');
671         static assert(toUpper(cast(C)'A') == 'A');
672     }
673 }
674 
675 
676 @safe unittest //Test both toUpper and toLower with non-builtin
677 {
678     //User Defined [Char|Wchar|Dchar]
679     static struct UDC {  char c; alias c this; }
680     static struct UDW { wchar c; alias c this; }
681     static struct UDD { dchar c; alias c this; }
682     //[Char|Wchar|Dchar] Enum
683     enum CE :  char {a = 'a', A = 'A'}
684     enum WE : wchar {a = 'a', A = 'A'}
685     enum DE : dchar {a = 'a', A = 'A'}
686     //User Defined [Char|Wchar|Dchar] Enum
687     enum UDCE : UDC {a = UDC('a'), A = UDC('A')}
688     enum UDWE : UDW {a = UDW('a'), A = UDW('A')}
689     enum UDDE : UDD {a = UDD('a'), A = UDD('A')}
690 
691     //User defined types with implicit cast to dchar test.
692     foreach (Char; AliasSeq!(UDC, UDW, UDD))
693     {
694         assert(toLower(Char('a')) == 'a');
695         assert(toLower(Char('A')) == 'a');
696         static assert(toLower(Char('a')) == 'a');
697         static assert(toLower(Char('A')) == 'a');
698         static assert(toUpper(Char('a')) == 'A');
699         static assert(toUpper(Char('A')) == 'A');
700     }
701 
702     //Various enum tests.
703     foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE))
704     {
705         assert(toLower(Enum.a) == 'a');
706         assert(toLower(Enum.A) == 'a');
707         assert(toUpper(Enum.a) == 'A');
708         assert(toUpper(Enum.A) == 'A');
709         static assert(toLower(Enum.a) == 'a');
710         static assert(toLower(Enum.A) == 'a');
711         static assert(toUpper(Enum.a) == 'A');
712         static assert(toUpper(Enum.A) == 'A');
713     }
714 
715     //Return value type tests for enum of non-UDT. These should be the original type.
716     foreach (T; AliasSeq!(CE, WE, DE))
717     {
718         alias C = OriginalType!T;
719         static assert(is(typeof(toLower(T.init)) == C));
720         static assert(is(typeof(toUpper(T.init)) == C));
721     }
722 
723     //Return value tests for UDT and enum of UDT. These should be dchar
724     foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE))
725     {
726         static assert(is(typeof(toLower(T.init)) == dchar));
727         static assert(is(typeof(toUpper(T.init)) == dchar));
728     }
729 }
730