1// Copyright 2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28
29/**
30 * Creates a CSV lines parser.
31 */
32export class CsvParser {
33  /**
34   * Converts \x00 and \u0000 escape sequences in the given string.
35   *
36   * @param {string} input field.
37   **/
38  escapeField(string) {
39    let nextPos = string.indexOf("\\");
40    if (nextPos === -1) return string;
41
42    let result = string.substring(0, nextPos);
43    // Escape sequences of the form \x00 and \u0000;
44    let endPos = string.length;
45    let pos = 0;
46    while (nextPos !== -1) {
47      let escapeIdentifier = string.charAt(nextPos + 1);
48      pos = nextPos + 2;
49      if (escapeIdentifier === 'n') {
50        result += '\n';
51        nextPos = pos;
52      } else if (escapeIdentifier === '\\') {
53        result += '\\';
54        nextPos = pos;
55      } else {
56        if (escapeIdentifier === 'x') {
57          // \x00 ascii range escapes consume 2 chars.
58          nextPos = pos + 2;
59        } else {
60          // \u0000 unicode range escapes consume 4 chars.
61          nextPos = pos + 4;
62        }
63        // Convert the selected escape sequence to a single character.
64        let escapeChars = string.substring(pos, nextPos);
65        if (escapeChars === '2C') {
66            result += ',';
67        } else {
68          result += String.fromCharCode(parseInt(escapeChars, 16));
69        }
70      }
71
72      // Continue looking for the next escape sequence.
73      pos = nextPos;
74      nextPos = string.indexOf("\\", pos);
75      // If there are no more escape sequences consume the rest of the string.
76      if (nextPos === -1) {
77        result += string.substr(pos);
78      } else if (pos !== nextPos) {
79        result += string.substring(pos, nextPos);
80      }
81    }
82    return result;
83  }
84
85  /**
86   * Parses a line of CSV-encoded values. Returns an array of fields.
87   *
88   * @param {string} line Input line.
89   */
90  parseLine(line) {
91    let pos = 0;
92    const endPos = line.length;
93    const fields = [];
94    if (endPos == 0) return fields;
95    let nextPos = 0;
96    while(nextPos !== -1) {
97      nextPos = line.indexOf(',', pos);
98      let field;
99      if (nextPos === -1) {
100        field = line.substr(pos);
101      } else {
102        field = line.substring(pos, nextPos);
103      }
104      fields.push(this.escapeField(field));
105      pos = nextPos + 1;
106    };
107    return fields
108  }
109}
110