1'''
2@author Sergey Chikuyonok (serge.che@gmail.com)
3@link http://chikuyonok.ru
4'''
5from zencoding.parser import css, xml
6import re
7
8def is_stop_char(token):
9	return token['type'] in '{};:'
10
11def char_at(text, pos):
12	"""
13	Returns character at specified index of text.
14	If index if out of range, returns empty string
15	"""
16	return text[pos] if pos < len(text) else ''
17
18def calculate_nl_length(content, pos):
19	"""
20	Calculates newline width at specified position in content
21	@param content: str
22	@param pos: int
23	@return: int
24	"""
25	if char_at(content, pos) == '\r' and char_at(content, pos + 1) == '\n':
26		return 2
27
28	return 1
29
30def post_process_optimized(optimized, original):
31	"""
32	Post-process optimized tokens: collapse tokens for complex values
33	@param optimized: Optimized tokens
34	@type optimized: list
35	@param original: Original preprocessed tokens
36	@type original: list
37	"""
38	for token in optimized:
39		child = None
40		if token['type'] == 'value':
41			token['children'] = []
42			child = None
43
44			subtoken_start = token['ref_start_ix']
45
46			while subtoken_start <= token['ref_end_ix']:
47				subtoken = original[subtoken_start]
48				if subtoken['type'] != 'white':
49					if not child:
50						child = [subtoken['start'], subtoken['end']]
51					else:
52						child[1] = subtoken['end']
53				elif child:
54					token['children'].append(child)
55					child = None
56
57				subtoken_start += 1
58
59			if child: # push last token
60				token['children'].append(child)
61
62	return optimized
63
64def make_token(type='', value='', pos=0, ix=0):
65	value = value or ''
66	return {
67		'type': type or '',
68		'content': value,
69		'start': pos,
70		'end': pos + len(value),
71		# Reference token index that starts current token
72		'ref_start_ix': ix,
73		# Reference token index that ends current token
74		'ref_end_ix': ix
75	}
76
77def parse_css(source, offset=0):
78	"""
79	Parses CSS and optimizes parsed chunks
80	@param source: CSS source code fragment
81	@type source: str
82	@param offset: Offset of CSS fragment inside whole document
83	@type offset: int
84	@return: list
85	"""
86	return optimize_css(css.parse(source), offset, source)
87
88def parse_html(tag, offset=0):
89	"""
90	Parses HTML and optimizes parsed chunks
91	@param source: HTML source code fragment
92	@type source: str
93	@param offset: Offset of HTML fragment inside whole document
94	@type offset: int
95	@return: list
96	"""
97	tokens = xml.parse(tag)
98	result = []
99	i = 0
100	loop = 1000 # infinite loop protection
101
102	try:
103		while loop:
104			loop -= 1
105			t = tokens['next']()
106			if not t:
107				break
108			else:
109				result.append(make_token(t['style'], t['content'], offset + i, 0))
110				i += len(t['value'])
111	except xml.StopIteration:
112		pass
113
114	return result
115
116class ExtList(list):
117	def __init__(self):
118		super(ExtList, self).__init__()
119		self.original = []
120
121
122def optimize_css(tokens, offset, content):
123	"""
124	Optimizes parsed CSS tokens: combines selector chunks, complex values
125	into a single chunk
126	@param tokens: Tokens produced by <code>CSSEX.lex()</code>
127	@type tokens: list
128	@param offset: CSS rule offset in source code (character index)
129	@type offset: int
130	@param content: Original CSS source code
131	@type content: str
132	@return: list of optimized tokens
133	"""
134	offset = offset or 0
135	result = ExtList()
136	_o = 0
137	i = 0
138	delta = 0
139	in_rules = False
140	in_value = False
141	acc_tokens = {
142		'selector': None,
143		'value': None
144	}
145	orig_tokens = []
146	acc_type = None
147
148	def add_token(token, type):
149		if type and type in acc_tokens:
150			if not acc_tokens[type]:
151				acc_tokens[type] = make_token(type, token['value'], offset + delta + token['charstart'], i)
152				result.append(acc_tokens[type])
153			else:
154				acc_tokens[type]['content'] += token['value']
155				acc_tokens[type]['end'] += len(token['value'])
156				acc_tokens[type]['ref_end_ix'] = i
157		else:
158			result.append(make_token(token['type'], token['value'], offset + delta + token['charstart'], i))
159
160	for i, token in enumerate(tokens):
161		token = tokens[i]
162		acc_type = None
163
164		if token['type'] == 'line':
165			delta += _o
166			nl_size = content and calculate_nl_length(content, delta) or 1
167			tok_value = nl_size == 1 and '\n' or '\r\n'
168
169			orig_tokens.append(make_token(token['type'], tok_value, offset + delta))
170
171			result.append(make_token(token['type'], tok_value, offset + delta, i))
172			delta += nl_size
173			_o = 0
174
175			continue
176
177		orig_tokens.append(make_token(token['type'], token['value'], offset + delta + token['charstart']))
178
179		# use charstart and length because of incorrect charend
180		# computation for whitespace
181		_o = token['charstart'] + len(token['value'])
182
183		if token['type'] != 'white':
184			if token['type'] == '{':
185				in_rules = True
186				acc_tokens['selector'] = None
187			elif in_rules:
188				if token['type'] == ':':
189					in_value = True
190				elif token['type'] == ';':
191					in_value = False
192					acc_tokens['value'] = None
193				elif token['type'] == '}':
194					in_value = in_rules = False
195					acc_tokens['value'] = None
196				elif in_value or acc_tokens['value']:
197					acc_type = 'value'
198			elif acc_tokens['selector'] or (not in_rules and not is_stop_char(token)):
199				# start selector token
200				acc_type = 'selector'
201
202			add_token(token, acc_type)
203		else:
204			# whitespace token, decide where it should be
205			if i < len(tokens) - 1 and is_stop_char(tokens[i + 1]):
206				continue
207
208			if acc_tokens['selector'] or acc_tokens['value']:
209				add_token(token, acc_tokens['selector'] and 'selector' or 'value')
210
211	result.original = orig_tokens
212	return post_process_optimized(result, orig_tokens)
213
214def extract_css_rule(content, pos, is_backward=False):
215	"""
216	 Extracts single CSS selector definition from source code
217	 @param {String} content CSS source code
218	 @type content: str
219	 @param pos: Character position where to start source code extraction
220	 @type pos: int
221	"""
222	result = ''
223	c_len = len(content)
224	offset = pos
225	brace_pos = -1
226
227	# search left until we find rule edge
228	while offset >= 0:
229		ch = content[offset]
230		if ch == '{':
231			brace_pos = offset
232			break
233		elif ch == '}' and not is_backward:
234			offset += 1
235			break
236
237		offset -= 1
238
239	# search right for full rule set
240	while offset < c_len:
241		ch = content[offset]
242		if ch == '{':
243			brace_pos = offset
244		elif ch == '}':
245			if brace_pos != -1:
246				result = content[brace_pos:offset + 1]
247			break
248
249		offset += 1
250
251	if result:
252		# find CSS selector
253		offset = brace_pos - 1
254		selector = ''
255		while offset >= 0:
256			ch = content[offset]
257			if ch in '{}/\\<>': break
258			offset -= 1
259
260		# also trim whitespace
261		re_white = re.compile(r'^[\s\n\r]+', re.MULTILINE)
262		selector = re.sub(re_white, '', content[offset + 1:brace_pos])
263		return (brace_pos - len(selector), brace_pos + len(result))
264
265	return None
266
267# function alias
268token = make_token