1 /*
2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package jdk.nashorn.internal.parser;
27 
28 import static java.lang.Character.DECIMAL_DIGIT_NUMBER;
29 import static java.lang.Character.LOWERCASE_LETTER;
30 import static java.lang.Character.OTHER_PUNCTUATION;
31 import static java.lang.Character.SPACE_SEPARATOR;
32 import static java.lang.Character.UPPERCASE_LETTER;
33 
34 import java.util.HashMap;
35 import java.util.Locale;
36 
37 /**
38  * JavaScript date parser. This class first tries to parse a date string
39  * according to the extended ISO 8601 format specified in ES5 15.9.1.15.
40  * If that fails, it falls back to legacy mode in which it accepts a range
41  * of different formats.
42  *
43  * <p>This class is neither thread-safe nor reusable. Calling the
44  * <tt>parse()</tt> method more than once will yield undefined results.</p>
45  */
46 public class DateParser {
47 
48     /** Constant for index position of parsed year value. */
49     public final static int YEAR        = 0;
50     /** Constant for index position of parsed month value. */
51     public final static int MONTH       = 1;
52     /** Constant for index position of parsed day value. */
53     public final static int DAY         = 2;
54     /** Constant for index position of parsed hour value. */
55     public final static int HOUR        = 3;
56     /** Constant for index position of parsed minute value. */
57     public final static int MINUTE      = 4;
58     /** Constant for index position of parsed second value. */
59     public final static int SECOND      = 5;
60     /** Constant for index position of parsed millisecond value. */
61     public final static int MILLISECOND = 6;
62     /** Constant for index position of parsed time zone offset value. */
63     public final static int TIMEZONE    = 7;
64 
65     private enum Token {
66         UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END
67     }
68 
69     private final String string;
70     private final int length;
71     private final Integer[] fields;
72     private int pos = 0;
73     private Token token;
74     private int tokenLength;
75     private Name nameValue;
76     private int numValue;
77     private int currentField = YEAR;
78     private int yearSign = 0;
79     private boolean namedMonth = false;
80 
81     private final static HashMap<String,Name> names = new HashMap<>();
82 
83     static {
84         addName("monday", Name.DAY_OF_WEEK, 0);
85         addName("tuesday", Name.DAY_OF_WEEK, 0);
86         addName("wednesday", Name.DAY_OF_WEEK, 0);
87         addName("thursday", Name.DAY_OF_WEEK, 0);
88         addName("friday", Name.DAY_OF_WEEK, 0);
89         addName("saturday", Name.DAY_OF_WEEK, 0);
90         addName("sunday", Name.DAY_OF_WEEK, 0);
91         addName("january", Name.MONTH_NAME, 1);
92         addName("february", Name.MONTH_NAME, 2);
93         addName("march", Name.MONTH_NAME, 3);
94         addName("april", Name.MONTH_NAME, 4);
95         addName("may", Name.MONTH_NAME, 5);
96         addName("june", Name.MONTH_NAME, 6);
97         addName("july", Name.MONTH_NAME, 7);
98         addName("august", Name.MONTH_NAME, 8);
99         addName("september", Name.MONTH_NAME, 9);
100         addName("october", Name.MONTH_NAME, 10);
101         addName("november", Name.MONTH_NAME, 11);
102         addName("december", Name.MONTH_NAME, 12);
103         addName("am", Name.AM_PM, 0);
104         addName("pm", Name.AM_PM, 12);
105         addName("z", Name.TIMEZONE_ID, 0);
106         addName("gmt", Name.TIMEZONE_ID, 0);
107         addName("ut", Name.TIMEZONE_ID, 0);
108         addName("utc", Name.TIMEZONE_ID, 0);
109         addName("est", Name.TIMEZONE_ID, -5 * 60);
110         addName("edt", Name.TIMEZONE_ID, -4 * 60);
111         addName("cst", Name.TIMEZONE_ID, -6 * 60);
112         addName("cdt", Name.TIMEZONE_ID, -5 * 60);
113         addName("mst", Name.TIMEZONE_ID, -7 * 60);
114         addName("mdt", Name.TIMEZONE_ID, -6 * 60);
115         addName("pst", Name.TIMEZONE_ID, -8 * 60);
116         addName("pdt", Name.TIMEZONE_ID, -7 * 60);
117         addName("t", Name.TIME_SEPARATOR, 0);
118     }
119 
120     /**
121      * Construct a new <tt>DateParser</tt> instance for parsing the given string.
122      * @param string the string to be parsed
123      */
DateParser(final String string)124     public DateParser(final String string) {
125         this.string = string;
126         this.length = string.length();
127         this.fields = new Integer[TIMEZONE + 1];
128     }
129 
130     /**
131      * Try parsing the given string as date according to the extended ISO 8601 format
132      * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails.
133      * This method returns <tt>true</tt> if the string could be parsed.
134      * @return true if the string could be parsed as date
135      */
parse()136     public boolean parse() {
137         return parseEcmaDate() || parseLegacyDate();
138     }
139 
140     /**
141      * Try parsing the date string according to the rules laid out in ES5 15.9.1.15.
142      * The date string must conform to the following format:
143      *
144      * <pre>  [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre>
145      *
146      * <p>If the string does not contain a time zone offset, the <tt>TIMEZONE</tt> field
147      * is set to <tt>0</tt> (GMT).</p>
148      * @return true if string represents a valid ES5 date string.
149      */
parseEcmaDate()150     public boolean parseEcmaDate() {
151 
152         if (token == null) {
153             token = next();
154         }
155 
156         while (token != Token.END) {
157 
158             switch (token) {
159                 case NUMBER:
160                     if (currentField == YEAR && yearSign != 0) {
161                         // 15.9.1.15.1 Extended year must have six digits
162                         if (tokenLength != 6) {
163                             return false;
164                         }
165                         numValue *= yearSign;
166                     } else if (!checkEcmaField(currentField, numValue)) {
167                         return false;
168                     }
169                     if (!skipEcmaDelimiter()) {
170                         return false;
171                     }
172                     if (currentField < TIMEZONE) {
173                         set(currentField++, numValue);
174                     }
175                     break;
176 
177                 case NAME:
178                     if (nameValue == null) {
179                         return false;
180                     }
181                     switch (nameValue.type) {
182                         case Name.TIME_SEPARATOR:
183                             if (currentField == YEAR || currentField > HOUR) {
184                                 return false;
185                             }
186                             currentField = HOUR;
187                             break;
188                         case Name.TIMEZONE_ID:
189                             if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) {
190                                 return false;
191                             }
192                             break;
193                         default:
194                             return false;
195                     }
196                     break;
197 
198                 case SIGN:
199                     if (peek() == -1) {
200                         // END after sign - wrong!
201                         return false;
202                     }
203 
204                     if (currentField == YEAR) {
205                         yearSign = numValue;
206                     } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) {
207                         // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds
208                         return false;
209                     }
210                     break;
211 
212                 default:
213                     return false;
214             }
215             token = next();
216         }
217 
218         return patchResult(true);
219     }
220 
221     /**
222      * Try parsing the date using a fuzzy algorithm that can handle a variety of formats.
223      *
224      * <p>Numbers separated by <tt>':'</tt> are treated as time values, optionally followed by a
225      * millisecond value separated by <tt>'.'</tt>. Other number values are treated as date values.
226      * The exact sequence of day, month, and year values to apply is determined heuristically.</p>
227      *
228      * <p>English month names and selected time zone names as well as AM/PM markers are recognized
229      * and handled properly. Additionally, numeric time zone offsets such as <tt>(+|-)hh:mm</tt> or
230      * <tt>(+|-)hhmm</tt> are recognized. If the string does not contain a time zone offset
231      * the <tt>TIMEZONE</tt>field is left undefined, meaning the local time zone should be applied.</p>
232      *
233      * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well.
234      * All other text causes parsing to fail.</p>
235      *
236      * @return true if the string could be parsed
237      */
parseLegacyDate()238     public boolean parseLegacyDate() {
239 
240         if (yearSign != 0 || currentField > DAY) {
241             // we don't support signed years in legacy mode
242             return false;
243         }
244         if (token == null) {
245             token = next();
246         }
247 
248         while (token != Token.END) {
249 
250             switch (token) {
251                 case NUMBER:
252                     if (skipDelimiter(':')) {
253                         // A number followed by ':' is parsed as time
254                         if (!setTimeField(numValue)) {
255                             return false;
256                         }
257                         // consume remaining time tokens
258                         do {
259                             token = next();
260                             if (token != Token.NUMBER || !setTimeField(numValue)) {
261                                 return false;
262                             }
263                         } while (skipDelimiter(isSet(SECOND) ? '.' : ':'));
264 
265                     } else {
266                         // Parse as date token
267                         if (!setDateField(numValue)) {
268                             return false;
269                         }
270                         skipDelimiter('-');
271                     }
272                     break;
273 
274                 case NAME:
275                     if (nameValue == null) {
276                         return false;
277                     }
278                     switch (nameValue.type) {
279                         case Name.AM_PM:
280                             if (!setAmPm(nameValue.value)) {
281                                 return false;
282                             }
283                             break;
284                         case Name.MONTH_NAME:
285                             if (!setMonth(nameValue.value)) {
286                                 return false;
287                             }
288                             break;
289                         case Name.TIMEZONE_ID:
290                             if (!setTimezone(nameValue.value, false)) {
291                                 return false;
292                             }
293                             break;
294                         case Name.TIME_SEPARATOR:
295                             return false;
296                         default:
297                             break;
298                     }
299                     if (nameValue.type != Name.TIMEZONE_ID) {
300                         skipDelimiter('-');
301                     }
302                     break;
303 
304                 case SIGN:
305                     if (peek() == -1) {
306                         // END after sign - wrong!
307                         return false;
308                     }
309 
310                     if (!setTimezone(readTimeZoneOffset(), true)) {
311                         return false;
312                     }
313                     break;
314 
315                 case PARENTHESIS:
316                     if (!skipParentheses()) {
317                         return false;
318                     }
319                     break;
320 
321                 case SEPARATOR:
322                     break;
323 
324                 default:
325                     return false;
326             }
327             token = next();
328         }
329 
330         return patchResult(false);
331     }
332 
333     /**
334      * Get the parsed date and time fields as an array of <tt>Integers</tt>.
335      *
336      * <p>If parsing was successful, all fields are guaranteed to be set except for the
337      * <tt>TIMEZONE</tt> field which may be <tt>null</tt>, meaning that local time zone
338      * offset should be applied.</p>
339      *
340      * @return the parsed date fields
341      */
getDateFields()342     public Integer[] getDateFields() {
343         return fields;
344     }
345 
isSet(final int field)346     private boolean isSet(final int field) {
347         return fields[field] != null;
348     }
349 
get(final int field)350     private Integer get(final int field) {
351         return fields[field];
352     }
353 
set(final int field, final int value)354     private void set(final int field, final int value) {
355         fields[field] = value;
356     }
357 
peek()358     private int peek() {
359         return pos < length ? string.charAt(pos) : -1;
360     }
361 
362     // Skip delimiter if followed by a number. Used for ISO 8601 formatted dates
skipNumberDelimiter(final char c)363     private boolean skipNumberDelimiter(final char c) {
364         if (pos < length - 1 && string.charAt(pos) == c
365                 && Character.getType(string.charAt(pos + 1)) == DECIMAL_DIGIT_NUMBER) {
366             token = null;
367             pos++;
368             return true;
369         }
370         return false;
371     }
372 
skipDelimiter(final char c)373     private boolean skipDelimiter(final char c) {
374         if (pos < length && string.charAt(pos) == c) {
375             token = null;
376             pos++;
377             return true;
378         }
379         return false;
380     }
381 
next()382     private Token next() {
383         if (pos >= length) {
384             tokenLength = 0;
385             return Token.END;
386         }
387 
388         final char c = string.charAt(pos);
389 
390         if (c > 0x80) {
391             tokenLength = 1;
392             pos++;
393             return Token.UNKNOWN; // We only deal with ASCII here
394         }
395 
396         final int type = Character.getType(c);
397         switch (type) {
398             case DECIMAL_DIGIT_NUMBER:
399                 numValue = readNumber(6);
400                 return Token.NUMBER;
401             case SPACE_SEPARATOR :
402             case OTHER_PUNCTUATION:
403                 tokenLength = 1;
404                 pos++;
405                 return Token.SEPARATOR;
406             case UPPERCASE_LETTER:
407             case LOWERCASE_LETTER:
408                 nameValue = readName();
409                 return Token.NAME;
410             default:
411                 tokenLength = 1;
412                 pos++;
413                 switch (c) {
414                     case '(':
415                         return Token.PARENTHESIS;
416                     case '-':
417                     case '+':
418                         numValue = c == '-' ? -1 : 1;
419                         return Token.SIGN;
420                     default:
421                         return Token.UNKNOWN;
422                 }
423         }
424     }
425 
checkLegacyField(final int field, final int value)426     private static boolean checkLegacyField(final int field, final int value) {
427         switch (field) {
428             case HOUR:
429                 return isHour(value);
430             case MINUTE:
431             case SECOND:
432                 return isMinuteOrSecond(value);
433             case MILLISECOND:
434                 return isMillisecond(value);
435             default:
436                 // skip validation on other legacy fields as we don't know what's what
437                 return true;
438         }
439     }
440 
checkEcmaField(final int field, final int value)441     private boolean checkEcmaField(final int field, final int value) {
442         switch (field) {
443             case YEAR:
444                 return tokenLength == 4;
445             case MONTH:
446                 return tokenLength == 2 && isMonth(value);
447             case DAY:
448                 return tokenLength == 2 && isDay(value);
449             case HOUR:
450                 return tokenLength == 2 && isHour(value);
451             case MINUTE:
452             case SECOND:
453                 return tokenLength == 2 && isMinuteOrSecond(value);
454             case MILLISECOND:
455                 // we allow millisecond to be less than 3 digits
456                 return tokenLength < 4 && isMillisecond(value);
457             default:
458                 return true;
459         }
460     }
461 
skipEcmaDelimiter()462     private boolean skipEcmaDelimiter() {
463         switch (currentField) {
464             case YEAR:
465             case MONTH:
466                 return skipNumberDelimiter('-') || peek() == 'T' || peek() == -1;
467             case DAY:
468                 return peek() == 'T' || peek() == -1;
469             case HOUR:
470             case MINUTE:
471                 return skipNumberDelimiter(':') || endOfTime();
472             case SECOND:
473                 return skipNumberDelimiter('.') || endOfTime();
474             default:
475                 return true;
476         }
477     }
478 
endOfTime()479     private boolean endOfTime() {
480         final int c = peek();
481         return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' ';
482     }
483 
isAsciiLetter(final char ch)484     private static boolean isAsciiLetter(final char ch) {
485         return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
486     }
487 
isAsciiDigit(final char ch)488     private static boolean isAsciiDigit(final char ch) {
489         return '0' <= ch && ch <= '9';
490     }
491 
readNumber(final int maxDigits)492     private int readNumber(final int maxDigits) {
493         final int start = pos;
494         int n = 0;
495         final int max = Math.min(length, pos + maxDigits);
496         while (pos < max && isAsciiDigit(string.charAt(pos))) {
497             n = n * 10 + string.charAt(pos++) - '0';
498         }
499         tokenLength = pos - start;
500         return n;
501     }
502 
readName()503     private Name readName() {
504         final int start = pos;
505         final int limit = Math.min(pos + 3, length);
506 
507         // first read up to the key length
508         while (pos < limit && isAsciiLetter(string.charAt(pos))) {
509             pos++;
510         }
511         final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH);
512         final Name name = names.get(key);
513         // then advance to end of name
514         while (pos < length && isAsciiLetter(string.charAt(pos))) {
515             pos++;
516         }
517 
518         tokenLength = pos - start;
519         // make sure we have the full name or a prefix
520         if (name != null && name.matches(string, start, tokenLength)) {
521             return name;
522         }
523         return null;
524     }
525 
readTimeZoneOffset()526     private int readTimeZoneOffset() {
527         final int sign = string.charAt(pos - 1) == '+' ? 1 : -1;
528         int offset = readNumber(2);
529         skipDelimiter(':');
530         offset = offset * 60 + readNumber(2);
531         return sign * offset;
532     }
533 
skipParentheses()534     private boolean skipParentheses() {
535         int parenCount = 1;
536         while (pos < length && parenCount != 0) {
537             final char c = string.charAt(pos++);
538             if (c == '(') {
539                 parenCount++;
540             } else if (c == ')') {
541                 parenCount--;
542             }
543         }
544         return true;
545     }
546 
getDefaultValue(final int field)547     private static int getDefaultValue(final int field) {
548         switch (field) {
549             case MONTH:
550             case DAY:
551                 return 1;
552             default:
553                 return 0;
554         }
555     }
556 
isDay(final int n)557     private static boolean isDay(final int n) {
558         return 1 <= n && n <= 31;
559     }
560 
isMonth(final int n)561     private static boolean isMonth(final int n) {
562         return 1 <= n && n <= 12;
563     }
564 
isHour(final int n)565     private static boolean isHour(final int n) {
566         return 0 <= n && n <= 24;
567     }
568 
isMinuteOrSecond(final int n)569     private static boolean isMinuteOrSecond(final int n) {
570         return 0 <= n && n < 60;
571     }
572 
isMillisecond(final int n)573     private static boolean isMillisecond(final int n) {
574         return 0<= n && n < 1000;
575     }
576 
setMonth(final int m)577     private boolean setMonth(final int m) {
578         if (!isSet(MONTH)) {
579             namedMonth = true;
580             set(MONTH, m);
581             return true;
582         }
583         return false;
584     }
585 
setDateField(final int n)586     private boolean setDateField(final int n) {
587         for (int field = YEAR; field != HOUR; field++) {
588             if (!isSet(field)) {
589                 // no validation on legacy date fields
590                 set(field, n);
591                 return true;
592             }
593         }
594         return false;
595     }
596 
setTimeField(final int n)597     private boolean setTimeField(final int n) {
598         for (int field = HOUR; field != TIMEZONE; field++) {
599             if (!isSet(field)) {
600                 if (checkLegacyField(field, n)) {
601                     set(field, n);
602                     return true;
603                 }
604                 return false;
605             }
606         }
607         return false;
608     }
609 
setTimezone(final int offset, final boolean asNumericOffset)610     private boolean setTimezone(final int offset, final boolean asNumericOffset) {
611         if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) {
612             set(TIMEZONE, offset);
613             return true;
614         }
615         return false;
616     }
617 
setAmPm(final int offset)618     private boolean setAmPm(final int offset) {
619         if (!isSet(HOUR)) {
620             return false;
621         }
622         final int hour = get(HOUR);
623         if (hour >= 0 && hour <= 12) {
624             set(HOUR, hour + offset);
625         }
626         return true;
627     }
628 
patchResult(final boolean strict)629     private boolean patchResult(final boolean strict) {
630         // sanity checks - make sure we have something
631         if (!isSet(YEAR) && !isSet(HOUR)) {
632             return false;
633         }
634         if (isSet(HOUR) && !isSet(MINUTE)) {
635             return false;
636         }
637         // fill in default values for unset fields except timezone
638         for (int field = YEAR; field <= TIMEZONE; field++) {
639             if (get(field) == null) {
640                 if (field == TIMEZONE && !strict) {
641                     // We only use UTC as default timezone for dates parsed complying with
642                     // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty
643                     // and local timezone is used.
644                     continue;
645                 }
646                 final int value = getDefaultValue(field);
647                 set(field, value);
648             }
649         }
650 
651         if (!strict) {
652             // swap year, month, and day if it looks like the right thing to do
653             if (isDay(get(YEAR))) {
654                 final int d = get(YEAR);
655                 set(YEAR, get(DAY));
656                 if (namedMonth) {
657                     // d-m-y
658                     set(DAY, d);
659                 } else {
660                     // m-d-y
661                     final int d2 = get(MONTH);
662                     set(MONTH, d);
663                     set(DAY, d2);
664                 }
665             }
666             // sanity checks now that we know what's what
667             if (!isMonth(get(MONTH)) || !isDay(get(DAY))) {
668                 return false;
669             }
670 
671             // add 1900 or 2000 to year if it's between 0 and 100
672             final int year = get(YEAR);
673             if (year >= 0 && year < 100) {
674                 set(YEAR, year >= 50 ? 1900 + year : 2000 + year);
675             }
676         } else {
677             // 24 hour value is only allowed if all other time values are zero
678             if (get(HOUR) == 24 &&
679                     (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) {
680                 return false;
681             }
682         }
683 
684         // set month to 0-based
685         set(MONTH, get(MONTH) - 1);
686         return true;
687     }
688 
addName(final String str, final int type, final int value)689     private static void addName(final String str, final int type, final int value) {
690         final Name name = new Name(str, type, value);
691         names.put(name.key, name);
692     }
693 
694     private static class Name {
695         final String name;
696         final String key;
697         final int value;
698         final int type;
699 
700         final static int DAY_OF_WEEK    = -1;
701         final static int MONTH_NAME     = 0;
702         final static int AM_PM          = 1;
703         final static int TIMEZONE_ID    = 2;
704         final static int TIME_SEPARATOR = 3;
705 
Name(final String name, final int type, final int value)706         Name(final String name, final int type, final int value) {
707             assert name != null;
708             assert name.equals(name.toLowerCase(Locale.ENGLISH));
709 
710             this.name = name;
711             // use first three characters as lookup key
712             this.key = name.substring(0, Math.min(3, name.length()));
713             this.type = type;
714             this.value = value;
715         }
716 
matches(final String str, final int offset, final int len)717         public boolean matches(final String str, final int offset, final int len) {
718             return name.regionMatches(true, 0, str, offset, len);
719         }
720 
721         @Override
toString()722         public String toString() {
723             return name;
724         }
725     }
726 
727 }
728