1// Copyright 2009 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 29/** 30 * Creates a CSV lines parser. 31 */ 32export class CsvParser { 33 /** 34 * Converts \x00 and \u0000 escape sequences in the given string. 35 * 36 * @param {string} input field. 37 **/ 38 escapeField(string) { 39 let nextPos = string.indexOf("\\"); 40 if (nextPos === -1) return string; 41 42 let result = string.substring(0, nextPos); 43 // Escape sequences of the form \x00 and \u0000; 44 let endPos = string.length; 45 let pos = 0; 46 while (nextPos !== -1) { 47 let escapeIdentifier = string.charAt(nextPos + 1); 48 pos = nextPos + 2; 49 if (escapeIdentifier === 'n') { 50 result += '\n'; 51 nextPos = pos; 52 } else if (escapeIdentifier === '\\') { 53 result += '\\'; 54 nextPos = pos; 55 } else { 56 if (escapeIdentifier === 'x') { 57 // \x00 ascii range escapes consume 2 chars. 58 nextPos = pos + 2; 59 } else { 60 // \u0000 unicode range escapes consume 4 chars. 61 nextPos = pos + 4; 62 } 63 // Convert the selected escape sequence to a single character. 64 let escapeChars = string.substring(pos, nextPos); 65 if (escapeChars === '2C') { 66 result += ','; 67 } else { 68 result += String.fromCharCode(parseInt(escapeChars, 16)); 69 } 70 } 71 72 // Continue looking for the next escape sequence. 73 pos = nextPos; 74 nextPos = string.indexOf("\\", pos); 75 // If there are no more escape sequences consume the rest of the string. 76 if (nextPos === -1) { 77 result += string.substr(pos); 78 } else if (pos !== nextPos) { 79 result += string.substring(pos, nextPos); 80 } 81 } 82 return result; 83 } 84 85 /** 86 * Parses a line of CSV-encoded values. Returns an array of fields. 87 * 88 * @param {string} line Input line. 89 */ 90 parseLine(line) { 91 let pos = 0; 92 const endPos = line.length; 93 const fields = []; 94 if (endPos == 0) return fields; 95 let nextPos = 0; 96 while(nextPos !== -1) { 97 nextPos = line.indexOf(',', pos); 98 let field; 99 if (nextPos === -1) { 100 field = line.substr(pos); 101 } else { 102 field = line.substring(pos, nextPos); 103 } 104 fields.push(this.escapeField(field)); 105 pos = nextPos + 1; 106 }; 107 return fields 108 } 109} 110