1 // Written in the D programming language.
2
3 /++
4 Functions which operate on ASCII characters.
5
6 All of the functions in std._ascii accept Unicode characters but
7 effectively ignore them if they're not ASCII. All $(D isX) functions return
8 $(D false) for non-ASCII characters, and all $(D toX) functions do nothing
9 to non-ASCII characters.
10
11 For functions which operate on Unicode characters, see
12 $(MREF std, uni).
13
14 $(SCRIPT inhibitQuickIndex = 1;)
15 $(DIVC quickindex,
16 $(BOOKTABLE,
17 $(TR $(TH Category) $(TH Functions))
18 $(TR $(TD Validation) $(TD
19 $(LREF isAlpha)
20 $(LREF isAlphaNum)
21 $(LREF isASCII)
22 $(LREF isControl)
23 $(LREF isDigit)
24 $(LREF isGraphical)
25 $(LREF isHexDigit)
26 $(LREF isOctalDigit)
27 $(LREF isPrintable)
28 $(LREF isPunctuation)
29 $(LREF isUpper)
30 $(LREF isWhite)
31 ))
32 $(TR $(TD Conversions) $(TD
33 $(LREF toLower)
34 $(LREF toUpper)
35 ))
36 $(TR $(TD Constants) $(TD
37 $(LREF digits)
38 $(LREF fullHexDigits)
39 $(LREF hexDigits)
40 $(LREF letters)
41 $(LREF lowercase)
42 $(LREF lowerHexDigits)
43 $(LREF newline)
44 $(LREF octalDigits)
45 $(LREF uppercase)
46 $(LREF whitespace)
47 ))
48 $(TR $(TD Enums) $(TD
49 $(LREF LetterCase)
50 ))
51 ))
52 References:
53 $(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
54 $(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia)
55
56 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
57 Authors: $(HTTP digitalmars.com, Walter Bright) and Jonathan M Davis
58 Source: $(PHOBOSSRC std/_ascii.d)
59 +/
60 module std.ascii;
61
version(unittest)62 version (unittest)
63 {
64 // FIXME: When dmd bug #314 is fixed, make these selective.
65 import std.meta; // : AliasSeq;
66 import std.range; // : chain;
67 import std.traits; // : functionAttributes, FunctionAttribute, isSafe;
68 }
69
70
71 immutable fullHexDigits = "0123456789ABCDEFabcdef"; /// 0 .. 9A .. Fa .. f
72 immutable hexDigits = fullHexDigits[0 .. 16]; /// 0 .. 9A .. F
73 immutable lowerHexDigits = "0123456789abcdef"; /// 0 .. 9a .. f
74 immutable digits = hexDigits[0 .. 10]; /// 0 .. 9
75 immutable octalDigits = digits[0 .. 8]; /// 0 .. 7
76 immutable letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z
77 immutable uppercase = letters[0 .. 26]; /// A .. Z
78 immutable lowercase = letters[26 .. 52]; /// a .. z
79 immutable whitespace = " \t\v\r\n\f"; /// ASCII _whitespace
80
81 /++
82 Letter case specifier.
83 +/
84 enum LetterCase : bool
85 {
86 upper, /// Upper case letters
87 lower /// Lower case letters
88 }
89
90 ///
91 @safe unittest
92 {
93 import std.conv : to;
94
95 assert(42.to!string(16, LetterCase.upper) == "2A");
96 assert(42.to!string(16, LetterCase.lower) == "2a");
97 }
98
99 ///
100 @system unittest
101 {
102 import std.digest.hmac : hmac;
103 import std.digest.digest : toHexString;
104 import std.digest.sha : SHA1;
105 import std.string : representation;
106
107 const sha1HMAC = "A very long phrase".representation
108 .hmac!SHA1("secret".representation)
109 .toHexString!(LetterCase.lower);
110 assert(sha1HMAC == "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5");
111 }
112
113 /// Newline sequence for this system.
114 version (Windows)
115 immutable newline = "\r\n";
116 else version (Posix)
117 immutable newline = "\n";
118 else
119 static assert(0, "Unsupported OS");
120
121
122 /++
123 Params: c = The character to test.
124 Returns: Whether $(D c) is a letter or a number (0 .. 9, a .. z, A .. Z).
125 +/
126 bool isAlphaNum(dchar c) @safe pure nothrow @nogc
127 {
128 return c <= 'z' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'Z'));
129 }
130
131 ///
132 @safe pure nothrow @nogc unittest
133 {
134 assert( isAlphaNum('A'));
135 assert( isAlphaNum('1'));
136 assert(!isAlphaNum('#'));
137
138 // N.B.: does not return true for non-ASCII Unicode alphanumerics:
139 assert(!isAlphaNum('á'));
140 }
141
142 @safe unittest
143 {
144 foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase))
145 assert(isAlphaNum(c));
146
147 foreach (c; whitespace)
148 assert(!isAlphaNum(c));
149 }
150
151
152 /++
153 Params: c = The character to test.
154 Returns: Whether $(D c) is an ASCII letter (A .. Z, a .. z).
155 +/
156 bool isAlpha(dchar c) @safe pure nothrow @nogc
157 {
158 // Optimizer can turn this into a bitmask operation on 64 bit code
159 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
160 }
161
162 ///
163 @safe pure nothrow @nogc unittest
164 {
165 assert( isAlpha('A'));
166 assert(!isAlpha('1'));
167 assert(!isAlpha('#'));
168
169 // N.B.: does not return true for non-ASCII Unicode alphabetic characters:
170 assert(!isAlpha('á'));
171 }
172
173 @safe unittest
174 {
175 foreach (c; chain(letters, lowercase, uppercase))
176 assert(isAlpha(c));
177
178 foreach (c; chain(digits, octalDigits, whitespace))
179 assert(!isAlpha(c));
180 }
181
182
183 /++
184 Params: c = The character to test.
185 Returns: Whether $(D c) is a lowercase ASCII letter (a .. z).
186 +/
187 bool isLower(dchar c) @safe pure nothrow @nogc
188 {
189 return c >= 'a' && c <= 'z';
190 }
191
192 ///
193 @safe pure nothrow @nogc unittest
194 {
195 assert( isLower('a'));
196 assert(!isLower('A'));
197 assert(!isLower('#'));
198
199 // N.B.: does not return true for non-ASCII Unicode lowercase letters
200 assert(!isLower('á'));
201 assert(!isLower('Á'));
202 }
203
204 @safe unittest
205 {
206 foreach (c; lowercase)
207 assert(isLower(c));
208
209 foreach (c; chain(digits, uppercase, whitespace))
210 assert(!isLower(c));
211 }
212
213
214 /++
215 Params: c = The character to test.
216 Returns: Whether $(D c) is an uppercase ASCII letter (A .. Z).
217 +/
218 bool isUpper(dchar c) @safe pure nothrow @nogc
219 {
220 return c <= 'Z' && 'A' <= c;
221 }
222
223 ///
224 @safe pure nothrow @nogc unittest
225 {
226 assert( isUpper('A'));
227 assert(!isUpper('a'));
228 assert(!isUpper('#'));
229
230 // N.B.: does not return true for non-ASCII Unicode uppercase letters
231 assert(!isUpper('á'));
232 assert(!isUpper('Á'));
233 }
234
235 @safe unittest
236 {
237 foreach (c; uppercase)
238 assert(isUpper(c));
239
240 foreach (c; chain(digits, lowercase, whitespace))
241 assert(!isUpper(c));
242 }
243
244
245 /++
246 Params: c = The character to test.
247 Returns: Whether $(D c) is a digit (0 .. 9).
248 +/
249 bool isDigit(dchar c) @safe pure nothrow @nogc
250 {
251 return '0' <= c && c <= '9';
252 }
253
254 ///
255 @safe pure nothrow @nogc unittest
256 {
257 assert( isDigit('3'));
258 assert( isDigit('8'));
259 assert(!isDigit('B'));
260 assert(!isDigit('#'));
261
262 // N.B.: does not return true for non-ASCII Unicode numbers
263 assert(!isDigit('0')); // full-width digit zero (U+FF10)
264 assert(!isDigit('4')); // full-width digit four (U+FF14)
265 }
266
267 @safe unittest
268 {
269 foreach (c; digits)
270 assert(isDigit(c));
271
272 foreach (c; chain(letters, whitespace))
273 assert(!isDigit(c));
274 }
275
276
277 /++
278 Params: c = The character to test.
279 Returns: Whether $(D c) is a digit in base 8 (0 .. 7).
280 +/
281 bool isOctalDigit(dchar c) @safe pure nothrow @nogc
282 {
283 return c >= '0' && c <= '7';
284 }
285
286 ///
287 @safe pure nothrow @nogc unittest
288 {
289 assert( isOctalDigit('0'));
290 assert( isOctalDigit('7'));
291 assert(!isOctalDigit('8'));
292 assert(!isOctalDigit('A'));
293 assert(!isOctalDigit('#'));
294 }
295
296 @safe unittest
297 {
298 foreach (c; octalDigits)
299 assert(isOctalDigit(c));
300
301 foreach (c; chain(letters, ['8', '9'], whitespace))
302 assert(!isOctalDigit(c));
303 }
304
305
306 /++
307 Params: c = The character to test.
308 Returns: Whether $(D c) is a digit in base 16 (0 .. 9, A .. F, a .. f).
309 +/
310 bool isHexDigit(dchar c) @safe pure nothrow @nogc
311 {
312 return c <= 'f' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'F'));
313 }
314
315 ///
316 @safe pure nothrow @nogc unittest
317 {
318 assert( isHexDigit('0'));
319 assert( isHexDigit('A'));
320 assert( isHexDigit('f')); // lowercase hex digits are accepted
321 assert(!isHexDigit('g'));
322 assert(!isHexDigit('G'));
323 assert(!isHexDigit('#'));
324 }
325
326 @safe unittest
327 {
328 foreach (c; fullHexDigits)
329 assert(isHexDigit(c));
330
331 foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace))
332 assert(!isHexDigit(c));
333 }
334
335
336 /++
337 Params: c = The character to test.
338 Returns: Whether or not $(D c) is a whitespace character. That includes the
339 space, tab, vertical tab, form feed, carriage return, and linefeed
340 characters.
341 +/
342 bool isWhite(dchar c) @safe pure nothrow @nogc
343 {
344 return c == ' ' || (c >= 0x09 && c <= 0x0D);
345 }
346
347 ///
348 @safe pure nothrow @nogc unittest
349 {
350 assert( isWhite(' '));
351 assert( isWhite('\t'));
352 assert( isWhite('\n'));
353 assert(!isWhite('1'));
354 assert(!isWhite('a'));
355 assert(!isWhite('#'));
356
357 // N.B.: Does not return true for non-ASCII Unicode whitespace characters.
358 static import std.uni;
359 assert(std.uni.isWhite('\u00A0'));
360 assert(!isWhite('\u00A0')); // std.ascii.isWhite
361 }
362
363 @safe unittest
364 {
365 foreach (c; whitespace)
366 assert(isWhite(c));
367
368 foreach (c; chain(digits, letters))
369 assert(!isWhite(c));
370 }
371
372
373 /++
374 Params: c = The character to test.
375 Returns: Whether $(D c) is a control character.
376 +/
377 bool isControl(dchar c) @safe pure nothrow @nogc
378 {
379 return c < 0x20 || c == 0x7F;
380 }
381
382 ///
383 @safe pure nothrow @nogc unittest
384 {
385 assert( isControl('\0'));
386 assert( isControl('\022'));
387 assert( isControl('\n')); // newline is both whitespace and control
388 assert(!isControl(' '));
389 assert(!isControl('1'));
390 assert(!isControl('a'));
391 assert(!isControl('#'));
392
393 // N.B.: non-ASCII Unicode control characters are not recognized:
394 assert(!isControl('\u0080'));
395 assert(!isControl('\u2028'));
396 assert(!isControl('\u2029'));
397 }
398
399 @safe unittest
400 {
401 foreach (dchar c; 0 .. 32)
402 assert(isControl(c));
403 assert(isControl(127));
404
405 foreach (c; chain(digits, letters, [' ']))
406 assert(!isControl(c));
407 }
408
409
410 /++
411 Params: c = The character to test.
412 Returns: Whether or not $(D c) is a punctuation character. That includes
413 all ASCII characters which are not control characters, letters, digits, or
414 whitespace.
415 +/
416 bool isPunctuation(dchar c) @safe pure nothrow @nogc
417 {
418 return c <= '~' && c >= '!' && !isAlphaNum(c);
419 }
420
421 ///
422 @safe pure nothrow @nogc unittest
423 {
424 assert( isPunctuation('.'));
425 assert( isPunctuation(','));
426 assert( isPunctuation(':'));
427 assert( isPunctuation('!'));
428 assert( isPunctuation('#'));
429 assert( isPunctuation('~'));
430 assert( isPunctuation('+'));
431 assert( isPunctuation('_'));
432
433 assert(!isPunctuation('1'));
434 assert(!isPunctuation('a'));
435 assert(!isPunctuation(' '));
436 assert(!isPunctuation('\n'));
437 assert(!isPunctuation('\0'));
438
439 // N.B.: Non-ASCII Unicode punctuation characters are not recognized.
440 assert(!isPunctuation('\u2012')); // (U+2012 = en-dash)
441 }
442
443 @safe unittest
444 {
445 foreach (dchar c; 0 .. 128)
446 {
447 if (isControl(c) || isAlphaNum(c) || c == ' ')
448 assert(!isPunctuation(c));
449 else
450 assert(isPunctuation(c));
451 }
452 }
453
454
455 /++
456 Params: c = The character to test.
457 Returns: Whether or not $(D c) is a printable character other than the
458 space character.
459 +/
460 bool isGraphical(dchar c) @safe pure nothrow @nogc
461 {
462 return '!' <= c && c <= '~';
463 }
464
465 ///
466 @safe pure nothrow @nogc unittest
467 {
468 assert( isGraphical('1'));
469 assert( isGraphical('a'));
470 assert( isGraphical('#'));
471 assert(!isGraphical(' ')); // whitespace is not graphical
472 assert(!isGraphical('\n'));
473 assert(!isGraphical('\0'));
474
475 // N.B.: Unicode graphical characters are not regarded as such.
476 assert(!isGraphical('á'));
477 }
478
479 @safe unittest
480 {
481 foreach (dchar c; 0 .. 128)
482 {
483 if (isControl(c) || c == ' ')
484 assert(!isGraphical(c));
485 else
486 assert(isGraphical(c));
487 }
488 }
489
490
491 /++
492 Params: c = The character to test.
493 Returns: Whether or not $(D c) is a printable character - including the
494 space character.
495 +/
496 bool isPrintable(dchar c) @safe pure nothrow @nogc
497 {
498 return c >= ' ' && c <= '~';
499 }
500
501 ///
502 @safe pure nothrow @nogc unittest
503 {
504 assert( isPrintable(' ')); // whitespace is printable
505 assert( isPrintable('1'));
506 assert( isPrintable('a'));
507 assert( isPrintable('#'));
508 assert(!isPrintable('\0')); // control characters are not printable
509
510 // N.B.: Printable non-ASCII Unicode characters are not recognized.
511 assert(!isPrintable('á'));
512 }
513
514 @safe unittest
515 {
516 foreach (dchar c; 0 .. 128)
517 {
518 if (isControl(c))
519 assert(!isPrintable(c));
520 else
521 assert(isPrintable(c));
522 }
523 }
524
525
526 /++
527 Params: c = The character to test.
528 Returns: Whether or not $(D c) is in the ASCII character set - i.e. in the
529 range 0 .. 0x7F.
530 +/
531 pragma(inline, true)
532 bool isASCII(dchar c) @safe pure nothrow @nogc
533 {
534 return c <= 0x7F;
535 }
536
537 ///
538 @safe pure nothrow @nogc unittest
539 {
540 assert( isASCII('a'));
541 assert(!isASCII('á'));
542 }
543
544 @safe unittest
545 {
546 foreach (dchar c; 0 .. 128)
547 assert(isASCII(c));
548
549 assert(!isASCII(128));
550 }
551
552
553 /++
554 Converts an ASCII letter to lowercase.
555
556 Params: c = A character of any type that implicitly converts to $(D dchar).
557 In the case where it's a built-in type, or an enum of a built-in type,
558 $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
559 type, $(D dchar) is returned.
560
561 Returns: The corresponding lowercase letter, if $(D c) is an uppercase
562 ASCII character, otherwise $(D c) itself.
563 +/
564 auto toLower(C)(C c)
565 if (is(C : dchar))
566 {
567 import std.traits : isAggregateType, OriginalType, Unqual;
568
569 alias OC = OriginalType!C;
570 static if (isAggregateType!OC)
571 alias R = dchar;
572 else
573 alias R = Unqual!OC;
574
575 return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c;
576 }
577
578 ///
579 @safe pure nothrow @nogc unittest
580 {
581 assert(toLower('a') == 'a');
582 assert(toLower('A') == 'a');
583 assert(toLower('#') == '#');
584
585 // N.B.: Non-ASCII Unicode uppercase letters are not converted.
586 assert(toLower('Á') == 'Á');
587 }
588
589 @safe pure nothrow unittest
590 {
591
592 foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
593 {
594 foreach (i, c; uppercase)
595 assert(toLower(cast(C) c) == lowercase[i]);
596
597 foreach (C c; 0 .. 128)
598 {
599 if (c < 'A' || c > 'Z')
600 assert(toLower(c) == c);
601 else
602 assert(toLower(c) != c);
603 }
604
605 foreach (C c; 128 .. C.max)
606 assert(toLower(c) == c);
607
608 //CTFE
609 static assert(toLower(cast(C)'a') == 'a');
610 static assert(toLower(cast(C)'A') == 'a');
611 }
612 }
613
614
615 /++
616 Converts an ASCII letter to uppercase.
617
618 Params: c = Any type which implicitly converts to $(D dchar). In the case
619 where it's a built-in type, or an enum of a built-in type,
620 $(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
621 type, $(D dchar) is returned.
622
623 Returns: The corresponding uppercase letter, if $(D c) is a lowercase ASCII
624 character, otherwise $(D c) itself.
625 +/
626 auto toUpper(C)(C c)
627 if (is(C : dchar))
628 {
629 import std.traits : isAggregateType, OriginalType, Unqual;
630
631 alias OC = OriginalType!C;
632 static if (isAggregateType!OC)
633 alias R = dchar;
634 else
635 alias R = Unqual!OC;
636
637 return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c;
638 }
639
640 ///
641 @safe pure nothrow @nogc unittest
642 {
643 assert(toUpper('a') == 'A');
644 assert(toUpper('A') == 'A');
645 assert(toUpper('#') == '#');
646
647 // N.B.: Non-ASCII Unicode lowercase letters are not converted.
648 assert(toUpper('á') == 'á');
649 }
650
651 @safe pure nothrow unittest
652 {
653 foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
654 {
655 foreach (i, c; lowercase)
656 assert(toUpper(cast(C) c) == uppercase[i]);
657
658 foreach (C c; 0 .. 128)
659 {
660 if (c < 'a' || c > 'z')
661 assert(toUpper(c) == c);
662 else
663 assert(toUpper(c) != c);
664 }
665
666 foreach (C c; 128 .. C.max)
667 assert(toUpper(c) == c);
668
669 //CTFE
670 static assert(toUpper(cast(C)'a') == 'A');
671 static assert(toUpper(cast(C)'A') == 'A');
672 }
673 }
674
675
676 @safe unittest //Test both toUpper and toLower with non-builtin
677 {
678 //User Defined [Char|Wchar|Dchar]
679 static struct UDC { char c; alias c this; }
680 static struct UDW { wchar c; alias c this; }
681 static struct UDD { dchar c; alias c this; }
682 //[Char|Wchar|Dchar] Enum
683 enum CE : char {a = 'a', A = 'A'}
684 enum WE : wchar {a = 'a', A = 'A'}
685 enum DE : dchar {a = 'a', A = 'A'}
686 //User Defined [Char|Wchar|Dchar] Enum
687 enum UDCE : UDC {a = UDC('a'), A = UDC('A')}
688 enum UDWE : UDW {a = UDW('a'), A = UDW('A')}
689 enum UDDE : UDD {a = UDD('a'), A = UDD('A')}
690
691 //User defined types with implicit cast to dchar test.
692 foreach (Char; AliasSeq!(UDC, UDW, UDD))
693 {
694 assert(toLower(Char('a')) == 'a');
695 assert(toLower(Char('A')) == 'a');
696 static assert(toLower(Char('a')) == 'a');
697 static assert(toLower(Char('A')) == 'a');
698 static assert(toUpper(Char('a')) == 'A');
699 static assert(toUpper(Char('A')) == 'A');
700 }
701
702 //Various enum tests.
703 foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE))
704 {
705 assert(toLower(Enum.a) == 'a');
706 assert(toLower(Enum.A) == 'a');
707 assert(toUpper(Enum.a) == 'A');
708 assert(toUpper(Enum.A) == 'A');
709 static assert(toLower(Enum.a) == 'a');
710 static assert(toLower(Enum.A) == 'a');
711 static assert(toUpper(Enum.a) == 'A');
712 static assert(toUpper(Enum.A) == 'A');
713 }
714
715 //Return value type tests for enum of non-UDT. These should be the original type.
716 foreach (T; AliasSeq!(CE, WE, DE))
717 {
718 alias C = OriginalType!T;
719 static assert(is(typeof(toLower(T.init)) == C));
720 static assert(is(typeof(toUpper(T.init)) == C));
721 }
722
723 //Return value tests for UDT and enum of UDT. These should be dchar
724 foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE))
725 {
726 static assert(is(typeof(toLower(T.init)) == dchar));
727 static assert(is(typeof(toUpper(T.init)) == dchar));
728 }
729 }
730