1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5from __future__ import absolute_import 6from __future__ import unicode_literals 7import re 8 9from .base import ( 10 Entity, OffsetComment, Whitespace, 11 Parser 12) 13from six import unichr 14 15 16class PropertiesEntityMixin(object): 17 escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|' 18 '(?P<nl>\n[ \t]*)|(?P<single>.))', re.M) 19 known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'} 20 21 @property 22 def val(self): 23 def unescape(m): 24 found = m.groupdict() 25 if found['uni']: 26 return unichr(int(found['uni'][1:], 16)) 27 if found['nl']: 28 return '' 29 return self.known_escapes.get(found['single'], found['single']) 30 31 return self.escape.sub(unescape, self.raw_val) 32 33 34class PropertiesEntity(PropertiesEntityMixin, Entity): 35 pass 36 37 38class PropertiesParser(Parser): 39 40 Comment = OffsetComment 41 42 def __init__(self): 43 self.reKey = re.compile( 44 '(?P<key>[^#! \t\r\n][^=:\n]*?)[ \t]*[:=][ \t]*', re.M) 45 self.reComment = re.compile('(?:[#!][^\n]*\n)*(?:[#!][^\n]*)', re.M) 46 self._escapedEnd = re.compile(r'\\+$') 47 self._trailingWS = re.compile(r'[ \t\r\n]*(?:\n|\Z)', re.M) 48 Parser.__init__(self) 49 50 def getNext(self, ctx, offset): 51 junk_offset = offset 52 # overwritten to parse values line by line 53 contents = ctx.contents 54 55 m = self.reComment.match(contents, offset) 56 if m: 57 current_comment = self.Comment(ctx, m.span()) 58 if offset == 0 and 'License' in current_comment.val: 59 # Heuristic. A early comment with "License" is probably 60 # a license header, and should be standalone. 61 return current_comment 62 offset = m.end() 63 else: 64 current_comment = None 65 66 m = self.reWhitespace.match(contents, offset) 67 if m: 68 white_space = Whitespace(ctx, m.span()) 69 offset = m.end() 70 if ( 71 current_comment is not None 72 and white_space.raw_val.count('\n') > 1 73 ): 74 # standalone comment 75 return current_comment 76 if current_comment is None: 77 return white_space 78 else: 79 white_space = None 80 81 m = self.reKey.match(contents, offset) 82 if m: 83 startline = offset = m.end() 84 while True: 85 endval = nextline = contents.find('\n', offset) 86 if nextline == -1: 87 endval = offset = len(contents) 88 break 89 # is newline escaped? 90 _e = self._escapedEnd.search(contents, offset, nextline) 91 offset = nextline + 1 92 if _e is None: 93 break 94 # backslashes at end of line, if 2*n, not escaped 95 if len(_e.group()) % 2 == 0: 96 break 97 startline = offset 98 99 # strip trailing whitespace 100 ws = self._trailingWS.search(contents, startline) 101 if ws: 102 endval = ws.start() 103 104 entity = PropertiesEntity( 105 ctx, current_comment, white_space, 106 (m.start(), endval), # full span 107 m.span('key'), 108 (m.end(), endval)) # value span 109 return entity 110 111 if current_comment is not None: 112 return current_comment 113 if white_space is not None: 114 return white_space 115 116 return self.getJunk(ctx, junk_offset, self.reKey, self.reComment) 117