1'''
2@author Sergey Chikuyonok (serge.che@gmail.com)
3@link http://chikuyonok.ru
4'''
5import re
6
7re_word = re.compile(r'^[\w\-:\$]+')
8re_attr_string = re.compile(r'^(["\'])((?:(?!\1)[^\\]|\\.)*)\1')
9re_valid_name = re.compile(r'^[\w\d\-_\$\:@!]+\+?$', re.IGNORECASE)
10
11def char_at(text, pos):
12	"""
13	Returns character at specified index of text.
14	If index if out of range, returns empty string
15	"""
16	return text[pos] if pos < len(text) else ''
17
18def split_expression(expr):
19	"""
20	Split expression by node name and its content, if exists. E.g. if we pass
21	'a{Text}' expression, it will be splitted into 'a' and 'Text'
22
23	@type expr: str
24	@return: Result tuple with two elements: node name and its content
25	"""
26	# fast test on text node
27	if '{' not in expr:
28		return expr, None
29
30	attr_lvl = 0
31	text_lvl = 0
32	brace_stack = []
33	i = 0
34	il = len(expr)
35
36	while i < il:
37		ch = expr[i]
38		if ch == '[':
39			if not text_lvl:
40				attr_lvl += 1
41		elif ch == ']':
42			if not text_lvl:
43				attr_lvl -= 1
44		elif ch == '{':
45			if not attr_lvl:
46				text_lvl += 1
47				brace_stack.append(i)
48		elif ch == '}':
49			if not attr_lvl:
50				text_lvl -= 1
51				brace_start = brace_stack.pop()
52				if text_lvl == 0:
53					# found braces bounds
54					return expr[0:brace_start], expr[brace_start + 1:i]
55		i += 1
56
57	# if we are here, then no valid text node found
58	return expr, None
59
60def parse_attributes(s):
61	"""
62	Parses tag attributes extracted from abbreviation
63	@type s: str
64
65	Example of incoming data:
66	#header
67	.some.data
68	.some.data#header
69	[attr]
70	#item[attr=Hello other="World"].class
71	"""
72
73	result = []
74	name = ''
75	collect_name = True
76	class_name = None
77	char_map = {'#': 'id', '.': 'class'}
78
79	# walk char-by-char
80	i = 0
81	il = len(s)
82
83	while i < il:
84		ch = s[i]
85		if ch == '#': # id
86			val = get_word(i, s[1:])
87			result.append({'name': char_map[ch], 'value': val})
88			i += len(val) + 1
89			collect_name = False
90		elif ch == '.': # class
91			val = get_word(i, s[1:])
92			if not class_name:
93				# remember object pointer for value modification
94				class_name = {'name': char_map[ch], 'value': ''}
95				result.append(class_name)
96
97			class_name['value'] += (class_name['value'] and ' ' or '') + val
98			i += len(val) + 1
99			collect_name = False
100		elif ch == '[': # begin attribute set
101			# search for end of set
102			try:
103				end_ix = s.index(']', i)
104				for a in extract_attributes(s[i + 1:end_ix]):
105					result.append(a)
106
107				i = end_ix
108			except:
109				# invalid attribute set, stop searching
110				i = len(s)
111
112			collect_name = False
113		else:
114			if collect_name:
115				name += ch
116			i += 1
117
118	return name, result
119
120def get_word(ix, s):
121	"""
122	Get word, starting at 'ix' character of 's
123	"""
124	m = re_word.match(s[ix:])
125	return m and m.group(0) or ''
126
127def extract_attributes(attr_set):
128	"""
129	Extract attributes and their values from attribute set
130	@type attr_set: str
131	"""
132	attr_set = attr_set.strip()
133	loop_count = 100 # endless loop protection
134	result = []
135	attr = None
136
137	while attr_set and loop_count:
138		attr_name = get_word(0, attr_set)
139		attr = None
140		if attr_name:
141			attr = {'name': attr_name, 'value': ''}
142
143			# let's see if attribute has value
144			ch = char_at(attr_set, len(attr_name))
145			if ch == '=':
146				ch2 = char_at(attr_set, len(attr_name) + 1)
147				if ch2 == '"' or ch2 == "'":
148					# we have a quoted string
149					m = re_attr_string.match(attr_set[len(attr_name) + 1:])
150					if m:
151						attr['value'] = m.group(2)
152						attr_set = attr_set[len(attr_name) + len(m.group(0)) + 1:].strip()
153					else:
154						# something wrong, break loop
155						attr_set = ''
156				else:
157					# unquoted string
158					m = re.match(r'(.+?)(\s|$)', attr_set[len(attr_name) + 1:])
159					if m:
160						attr['value'] = m.group(1)
161						attr_set = attr_set[len(attr_name) + len(m.group(1)) + 1:].strip()
162					else:
163						# something wrong, break loop
164						attr_set = ''
165			else:
166				attr_set = attr_set[len(attr_name):].strip()
167		else:
168			# something wrong, can't extract attribute name
169			break;
170
171		if attr: result.append(attr)
172		loop_count -= 1
173
174	return result
175
176def squash(node):
177	"""
178	Optimizes tree node: replaces empty nodes with their children
179	@type node: TreeNode
180	@return: TreeNode
181	"""
182	for i, child in enumerate(node.children):
183		if child.is_empty():
184			node.children[i:i + 1] = child.children
185
186	return node
187
188def optimize_tree(node):
189	"""
190	@type node: TreeNode
191	@return: TreeNode
192	"""
193	while node.has_empty_children():
194		squash(node)
195
196	for child in node.children:
197		optimize_tree(child)
198
199	return node
200
201def parse(abbr):
202	"""
203	Parses abbreviation into tree with respect of groups,
204	text nodes and attributes. Each node of the tree is a single
205	abbreviation. Tree represents actual structure of the outputted
206	result
207	@param abbr: Abbreviation to parse
208	@type abbr: str
209	@return: TreeNode
210	"""
211	root = TreeNode()
212	context = root.add_child()
213	i = 0
214	il = len(abbr)
215	text_lvl = 0
216	attr_lvl = 0
217	group_stack = [root]
218	token = ['']
219
220	def dump_token():
221		if token[0]:
222			context.set_abbreviation(token[0])
223		token[0] = ''
224
225	while i < il:
226		ch = abbr[i]
227		prev_ch = i and abbr[i - 1] or ''
228		if ch == '{':
229			if not attr_lvl:
230				text_lvl += 1
231			token[0] += ch
232		elif ch == '}':
233			if not attr_lvl:
234				text_lvl -= 1
235			token[0] += ch
236		elif ch == '[':
237			if not text_lvl:
238				attr_lvl += 1
239			token[0] += ch
240		elif ch == ']':
241			if not text_lvl:
242				attr_lvl -= 1
243			token[0] += ch
244		elif ch == '(':
245			if not text_lvl and not attr_lvl:
246				# beginning of the new group
247				dump_token();
248
249				if prev_ch != '+' and prev_ch != '>':
250					# previous char is not an operator, assume it's
251					# a sibling
252					context = context.parent.add_child()
253
254				group_stack.append(context)
255				context = context.add_child()
256			else:
257				token[0] += ch
258		elif ch == ')':
259			if not text_lvl and not attr_lvl:
260				# end of the group, pop stack
261				dump_token()
262				context = group_stack.pop()
263
264				if i < il - 1 and char_at(abbr, i + 1) == '*':
265					# group multiplication
266					group_mul = ''
267					for j in xrange(i + 2, il):
268						n_ch = abbr[j]
269						if n_ch.isdigit():
270							group_mul += n_ch
271						else:
272							break
273
274					i += len(group_mul) + 1
275					group_mul = int(group_mul or 1)
276					while 1 < group_mul:
277						context.parent.add_child(context)
278						group_mul -= 1
279			else:
280				token[0] += ch
281		elif ch == '+': # sibling operator
282			if not text_lvl and not attr_lvl and i != il - 1:
283				dump_token()
284				context = context.parent.add_child()
285			else:
286				token[0] += ch
287		elif ch == '>': # child operator
288			if not text_lvl and not attr_lvl:
289				dump_token()
290				context = context.add_child()
291			else:
292				token[0] += ch
293		else:
294			token[0] += ch
295
296		i += 1
297
298	# put the final token
299	dump_token()
300	return optimize_tree(root)
301
302class TreeNode(object):
303	re_multiplier = re.compile(r'\*(\d+)?$')
304
305	def __init__(self, parent=None):
306		self.abbreviation = '';
307		self.parent = parent
308		self.children = []
309		self.count = 1
310		self.name = None
311		self.text = None
312		self.attributes = []
313		self.is_repeating = False
314		self.has_implicit_name = False
315
316	def add_child(self, child=None):
317		"""
318		Adds passed or creates new child
319		@type child: TreeNode
320		@return: TreeNode
321		"""
322		if not child: child = TreeNode()
323		child.parent = self
324		self.children.append(child)
325		return child
326
327	def replace(self, node):
328		"""
329		Replace current node in parent's child list with another node
330		@type node: TreeNode
331		"""
332		if self.parent:
333			children = self.parent.children
334			if self in children:
335				children[children.index(self)] = node
336				self.parent = None
337				return
338
339	def set_abbreviation(self, abbr):
340		"""
341		Sets abbreviation that belongs to current node
342		@type abbr: str
343		"""
344		self.abbreviation = abbr
345		m = self.re_multiplier.search(abbr)
346		if m:
347			self.count = m.group(1) and int(m.group(1)) or 1
348			self.is_repeating = not m.group(1)
349			abbr = abbr[0:-len(m.group(0))]
350
351		if abbr:
352			name, self.text = split_expression(abbr)
353
354			if name:
355				self.name, self.attributes = parse_attributes(name)
356				if not self.name:
357					self.name = 'div'
358					self.has_implicit_name = True
359
360		# validate name
361		if self.name and not re_valid_name.match(self.name):
362			raise ZenInvalidAbbreviation('self.name')
363
364	def get_abbreviation(self):
365		return self.expr
366
367	def to_string(self, level=0):
368		"""
369		Dump current tree node into a foramtted string
370		"""
371		output = '(empty)'
372		if self.abbreviation:
373			output = ''
374			if self.name:
375				output = self.name
376
377			if self.text is not None:
378				output += (output and ' ' or '') + '{text: "' + self.text + '"}'
379
380			if self.attributes:
381				output += ' [' + ', '.join(['%s="%s"' % (a['name'], a['value']) for a in self.attributes]) + ']'
382
383		result = ('-' * level) + output + '\n'
384		for child in self.children:
385			result += child.to_string(level + 1)
386
387		return result
388
389	def __repr__(self):
390		return self.to_string()
391
392	def has_empty_children(self):
393		"""
394		Check if current node contains children with empty <code>expr</code>
395		property
396		"""
397		for child in self.children:
398			if child.is_empty():
399				return True
400
401		return False
402
403	def is_empty(self):
404		return not self.abbreviation
405
406	def is_text_node(self):
407		"""
408		Check if current node is a text-only node
409		"""
410		return not self.name and self.text
411
412
413class ZenInvalidAbbreviation(Exception):
414	"""
415	Invalid abbreviation error
416	@since: 0.7
417	"""
418	def __init__(self, value):
419		self.value = value
420	def __str__(self):
421		return repr(self.value)