1# -*- coding: UTF-8 -*-
2import os
3import sys
4import unittest
5
6from lxml import etree
7from xmldiff import formatting, main, actions
8
9from .testing import generate_filebased_cases
10
11START = u'<document xmlns:diff="http://namespaces.shoobx.com/diff"><node'
12END = u'</node></document>'
13
14
15class PlaceholderMakerTests(unittest.TestCase):
16
17    def test_get_placeholder(self):
18        replacer = formatting.PlaceholderMaker()
19        # Get a placeholder:
20        ph = replacer.get_placeholder(
21            etree.Element('tag'), formatting.T_OPEN, None)
22        self.assertEqual(ph, u'\ue005')
23        # Do it again:
24        ph = replacer.get_placeholder(
25            etree.Element('tag'), formatting.T_OPEN, None)
26        self.assertEqual(ph, u'\ue005')
27        # Get another one
28        ph = replacer.get_placeholder(
29            etree.Element('tag'), formatting.T_CLOSE, ph)
30        self.assertEqual(ph, u'\ue006')
31
32    def test_do_element(self):
33        replacer = formatting.PlaceholderMaker(['p'], ['b'])
34
35        # Formatting tags get replaced, and the content remains
36        text = u'<p>This is a tag with <b>formatted</b> text.</p>'
37        element = etree.fromstring(text)
38        replacer.do_element(element)
39
40        self.assertEqual(
41            etree.tounicode(element),
42            u'<p>This is a tag with \ue006formatted\ue005 text.</p>')
43
44        replacer.undo_element(element)
45        self.assertEqual(etree.tounicode(element), text)
46
47        # Non formatting tags get replaced with content
48        text = u'<p>This is a tag with <foo>formatted</foo> text.</p>'
49        element = etree.fromstring(text)
50        replacer.do_element(element)
51        result = etree.tounicode(element)
52        self.assertEqual(
53            result,
54            u'<p>This is a tag with \ue007 text.</p>')
55
56        # Single formatting tags still get two placeholders.
57        text = u'<p>This is a <b/> with <foo/> text.</p>'
58        element = etree.fromstring(text)
59        replacer.do_element(element)
60        result = etree.tounicode(element)
61        self.assertEqual(
62            result,
63            u'<p>This is a \ue009\ue008 with \ue00a text.</p>')
64
65    def test_do_undo_element(self):
66        replacer = formatting.PlaceholderMaker(['p'], ['b'])
67
68        # Formatting tags get replaced, and the content remains
69        text = u'<p>This <is/> a <f>tag</f> with <b>formatted</b> text.</p>'
70        element = etree.fromstring(text)
71        replacer.do_element(element)
72
73        self.assertEqual(
74            element.text,
75            u'This \ue005 a \ue006 with \ue008formatted'
76            u'\ue007 text.')
77
78        replacer.undo_element(element)
79        result = etree.tounicode(element)
80        self.assertEqual(result, text)
81
82    def test_do_undo_element_double_format(self):
83        replacer = formatting.PlaceholderMaker(['p'], ['b', 'u'])
84
85        # Formatting tags get replaced, and the content remains
86        text = u'<p>This is <u>doubly <b>formatted</b></u> text.</p>'
87        element = etree.fromstring(text)
88        replacer.do_element(element)
89
90        self.assertEqual(
91            element.text,
92            u'This is \ue006doubly \ue008formatted\ue007'
93            u'\ue005 text.')
94
95        replacer.undo_element(element)
96        result = etree.tounicode(element)
97        self.assertEqual(result, text)
98
99    def test_rml_bug(self):
100        etree.register_namespace(formatting.DIFF_PREFIX, formatting.DIFF_NS)
101        before_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
102  <section>
103    <para>
104      <ref>4</ref>.
105      <u><b>At Will Employment</b></u>
106      .\u201cText\u201d
107    </para>
108  </section>
109</document>"""
110        tree = etree.fromstring(before_diff)
111        replacer = formatting.PlaceholderMaker(
112            text_tags=('para',), formatting_tags=('b', 'u', 'i',))
113        replacer.do_tree(tree)
114        after_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
115  <section>
116    <para>
117      <insert>\ue005</insert>.
118      \ue007\ue009At Will Employment\ue008\ue006
119      .\u201c<insert>New </insert>Text\u201d
120    </para>
121  </section>
122</document>"""
123
124        # The diff formatting will find some text to insert.
125        delete_attrib = u'{%s}delete-format' % formatting.DIFF_NS
126        replacer.placeholder2tag[u'\ue006'
127                                 ].element.attrib[delete_attrib] = ''
128        replacer.placeholder2tag[u'\ue007'
129                                 ].element.attrib[delete_attrib] = ''
130        tree = etree.fromstring(after_diff)
131        replacer.undo_tree(tree)
132        result = etree.tounicode(tree)
133        expected = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff">
134  <section>
135    <para>
136      <insert><ref>4</ref></insert>.
137      <u diff:delete-format=""><b>At Will Employment</b></u>
138      .\u201c<insert>New </insert>Text\u201d
139    </para>
140  </section>
141</document>"""
142        self.assertEqual(result, expected)
143
144    def test_placeholder_overflow(self):
145        # PY3: This whole test is Python 2 support.
146        # Test what happens when we have more than 6400 placeholders,
147        # by patching the placeholder:
148        try:
149            orig_start = formatting.PLACEHOLDER_START
150            # This is the last character of the Private use area
151            formatting.PLACEHOLDER_START = 0xF8FF
152
153            replacer = formatting.PlaceholderMaker(['p'], ['b'])
154
155            # Formatting tags get replaced, and the content remains
156            text = u'<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>'
157            element = etree.fromstring(text)
158            replacer.do_element(element)
159
160            #
161            self.assertEqual(
162                element.text,
163                u'This \uf904 a \uf905 with \uf907some'
164                u'\uf906 text.')
165
166            try:
167                # If this is a wide build, also test what happens if we
168                # get over 8192 substitutions, and overflow the 2-byte code.
169                # (On narrow builds this will give an error)
170                formatting.PLACEHOLDER_START = 0xFFFF
171
172                replacer = formatting.PlaceholderMaker(['p'], ['b'])
173
174                # Formatting tags get replaced, and the content remains
175                text = u'<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>'
176                element = etree.fromstring(text)
177                replacer.do_element(element)
178
179                # This should raise an error on a narrow build
180                self.assertEqual(
181                    element.text,
182                    u'This \U00010004 a \U00010005 with \U00010007some'
183                    u'\U00010006 text.')
184            except ValueError:
185                if sys.maxunicode > 0x10000:
186                    # This is a wide build, we should NOT get an error
187                    raise
188
189        finally:
190            # Set it back
191            formatting.PLACEHOLDER_START = orig_start
192
193
194class XMLFormatTests(unittest.TestCase):
195
196    def _format_test(self, left, action, expected):
197        formatter = formatting.XMLFormatter(pretty_print=False)
198        result = formatter.format([action], etree.fromstring(left))
199        self.assertEqual(result, expected)
200
201    def test_incorrect_xpaths(self):
202        left = u'<document><node a="v"/><node>Text</node></document>'
203        expected = START + u' diff:delete-attr="a">Text' + END
204
205        with self.assertRaises(ValueError):
206            action = actions.DeleteAttrib('/document/node', 'a')
207            self._format_test(left, action, expected)
208
209        with self.assertRaises(ValueError):
210            action = actions.DeleteAttrib('/document/ummagumma', 'a')
211            self._format_test(left, action, expected)
212
213    def test_del_attr(self):
214        left = u'<document><node a="v">Text</node></document>'
215        action = actions.DeleteAttrib('/document/node', 'a')
216        expected = START + u' diff:delete-attr="a">Text' + END
217
218        self._format_test(left, action, expected)
219
220    def test_del_node(self):
221        left = u'<document><node attr="val">Text</node></document>'
222        action = actions.DeleteNode('/document/node')
223        expected = START + u' attr="val" diff:delete="">Text' + END
224
225        self._format_test(left, action, expected)
226
227    def test_del_text(self):
228        left = u'<document><node attr="val">Text</node></document>'
229        action = actions.UpdateTextIn('/document/node', None)
230        expected = START + u' attr="val"><diff:delete>Text</diff:delete>' + END
231
232        self._format_test(left, action, expected)
233
234    def test_insert_attr(self):
235        left = u'<document><node>We need more text</node></document>'
236        action = actions.InsertAttrib('/document/node', 'attr', 'val')
237        expected = START + u' attr="val" diff:add-attr="attr">'\
238            u'We need more text' + END
239
240        self._format_test(left, action, expected)
241
242    def test_insert_node(self):
243        left = u'<document></document>'
244        action = actions.InsertNode('/document', 'node', 0)
245        expected = START + u' diff:insert=""/></document>'
246
247        self._format_test(left, action, expected)
248
249    def test_move_attr(self):
250        # The library currently only uses move attr for when attributes are
251        # renamed:
252        left = u'<document><node attr="val">Text</node></document>'
253        action = actions.RenameAttrib('/document/node', 'attr', 'bottr')
254        expected = START + u' bottr="val" diff:rename-attr="attr:bottr"'\
255            u'>Text' + END
256
257        self._format_test(left, action, expected)
258
259    def test_move_node(self):
260        # Move 1 down
261        left = u'<document><node id="1" /><node id="2" /></document>'
262        action = actions.MoveNode('/document/node[1]', '/document', 1)
263        expected = START + u' id="1" diff:delete=""/><node id="2"/><node '\
264            u'id="1" diff:insert=""/></document>'
265
266        self._format_test(left, action, expected)
267
268        # Move 2 up (same result, different diff)
269        left = u'<document><node id="1" /><node id="2" /></document>'
270        action = actions.MoveNode('/document/node[2]', '/document', 0)
271        expected = START + u' id="2" diff:insert=""/><node id="1"/><node '\
272            u'id="2" diff:delete=""/></document>'
273
274        self._format_test(left, action, expected)
275
276    def test_rename_node(self):
277        left = u'<document><node><para>Content</para>Tail</node></document>'
278        action = actions.RenameNode('/document/node[1]/para[1]', 'newtag')
279        expected = START + u'><newtag diff:rename="para">Content'\
280            '</newtag>Tail' + END
281
282        self._format_test(left, action, expected)
283
284    def test_update_attr(self):
285        left = u'<document><node attr="val"/></document>'
286        action = actions.UpdateAttrib('/document/node', 'attr', 'newval')
287        expected = START + u' attr="newval" diff:update-attr="attr:val"/>'\
288            u'</document>'
289
290        self._format_test(left, action, expected)
291
292    def test_update_text_in(self):
293        left = u'<document><node attr="val"/></document>'
294        action = actions.UpdateTextIn('/document/node', 'Text')
295        expected = START + u' attr="val"><diff:insert>Text</diff:insert>' + END
296
297        self._format_test(left, action, expected)
298
299        left = u'<document><node>This is a bit of text, right' + END
300        action = actions.UpdateTextIn('/document/node',
301                                      'Also a bit of text, rick')
302        expected = START + u'><diff:delete>This is</diff:delete><diff:insert>'\
303            u'Also</diff:insert> a bit of text, ri<diff:delete>ght'\
304            u'</diff:delete><diff:insert>ck</diff:insert>' + END
305
306        self._format_test(left, action, expected)
307
308    def test_update_text_after_1(self):
309        left = u'<document><node/><node/></document>'
310        action = actions.UpdateTextAfter('/document/node[1]', 'Text')
311        expected = START + u'/><diff:insert>Text</diff:insert>'\
312            u'<node/></document>'
313
314        self._format_test(left, action, expected)
315
316    def test_update_text_after_2(self):
317        left = u'<document><node/>This is a bit of text, right</document>'
318        action = actions.UpdateTextAfter('/document/node',
319                                         'Also a bit of text, rick')
320        expected = START + u'/><diff:delete>This is</diff:delete>'\
321            u'<diff:insert>Also</diff:insert> a bit of text, ri<diff:delete>'\
322            u'ght</diff:delete><diff:insert>ck</diff:insert></document>'
323
324        self._format_test(left, action, expected)
325
326
327class DiffFormatTests(unittest.TestCase):
328
329    def _format_test(self, action, expected):
330        formatter = formatting.DiffFormatter()
331        result = formatter.format([action], None)
332        self.assertEqual(result, expected)
333
334    def test_del_attr(self):
335        action = actions.DeleteAttrib('/document/node', 'a')
336        expected = '[delete-attribute, /document/node, a]'
337        self._format_test(action, expected)
338
339    def test_del_node(self):
340        action = actions.DeleteNode('/document/node')
341        expected = '[delete, /document/node]'
342        self._format_test(action, expected)
343
344    def test_del_text(self):
345        action = actions.UpdateTextIn('/document/node', None)
346        expected = '[update-text, /document/node, null]'
347        self._format_test(action, expected)
348
349    def test_insert_attr(self):
350        action = actions.InsertAttrib('/document/node', 'attr', 'val')
351        expected = '[insert-attribute, /document/node, attr, "val"]'
352        self._format_test(action, expected)
353
354    def test_insert_node(self):
355        action = actions.InsertNode('/document', 'node', 0)
356        expected = '[insert, /document, node, 0]'
357        self._format_test(action, expected)
358
359    def test_rename_attr(self):
360        action = actions.RenameAttrib('/document/node', 'attr', 'bottr')
361        expected = '[rename-attribute, /document/node, attr, bottr]'
362        self._format_test(action, expected)
363
364    def test_move_node(self):
365        # Move 1 down
366        action = actions.MoveNode('/document/node[1]', '/document', 1)
367        expected = '[move, /document/node[1], /document, 1]'
368        self._format_test(action, expected)
369
370        # Move 2 up (same result, different diff)
371        action = actions.MoveNode('/document/node[2]', '/document', 0)
372        expected = '[move, /document/node[2], /document, 0]'
373
374        self._format_test(action, expected)
375
376    def test_rename_node(self):
377        # Move 1 down
378        action = actions.RenameNode('/document/node[1]', 'newtag')
379        expected = '[rename, /document/node[1], newtag]'
380        self._format_test(action, expected)
381
382        # Move 2 up (same result, different diff)
383        action = actions.MoveNode('/document/node[2]', '/document', 0)
384        expected = '[move, /document/node[2], /document, 0]'
385
386        self._format_test(action, expected)
387
388    def test_update_attr(self):
389        action = actions.UpdateAttrib('/document/node', 'attr', 'newval')
390        expected = '[update-attribute, /document/node, attr, "newval"]'
391        self._format_test(action, expected)
392
393    def test_update_text_in(self):
394        action = actions.UpdateTextIn('/document/node', 'Text')
395        expected = '[update-text, /document/node, "Text"]'
396        self._format_test(action, expected)
397
398        action = actions.UpdateTextIn('/document/node',
399                                      'Also a bit of text, "rick"')
400        expected = '[update-text, /document/node, '\
401            u'"Also a bit of text, \\"rick\\""]'
402        self._format_test(action, expected)
403
404    def test_update_text_after_1(self):
405        action = actions.UpdateTextAfter('/document/node[1]', 'Text')
406        expected = '[update-text-after, /document/node[1], "Text"]'
407        self._format_test(action, expected)
408
409    def test_update_text_after_2(self):
410        action = actions.UpdateTextAfter('/document/node',
411                                         'Also a bit of text, rick')
412        expected = '[update-text-after, /document/node, '\
413            u'"Also a bit of text, rick"]'
414        self._format_test(action, expected)
415
416    def test_insert_comment(self):
417        action = actions.InsertComment('/document/node', 2, 'Commentary')
418        expected = '[insert-comment, /document/node, 2, "Commentary"]'
419        self._format_test(action, expected)
420
421
422class XmlDiffFormatTests(unittest.TestCase):
423    # RenameAttr and MoveNode requires an orig_tree, so they
424    # are not tested in the _format_test tests, but in the
425    # all_actions test, which uses test_data files.
426
427    def _format_test(self, action, expected):
428        formatter = formatting.XmlDiffFormatter()
429        result = formatter.format([action], None)
430        self.assertEqual(result, expected)
431
432    def test_del_attr(self):
433        action = actions.DeleteAttrib('/document/node', 'a')
434        expected = '[remove, /document/node/@a]'
435        self._format_test(action, expected)
436
437    def test_del_node(self):
438        action = actions.DeleteNode('/document/node')
439        expected = '[remove, /document/node]'
440        self._format_test(action, expected)
441
442    def test_del_text(self):
443        action = actions.UpdateTextIn('/document/node', None)
444        expected = '[update, /document/node/text()[1], null]'
445        self._format_test(action, expected)
446
447    def test_insert_attr(self):
448        action = actions.InsertAttrib('/document/node', 'attr', 'val')
449        expected = '[insert, /document/node, \n<@attr>\nval\n</@attr>]'
450        self._format_test(action, expected)
451
452    def test_insert_node(self):
453        action = actions.InsertNode('/document', 'node', 0)
454        expected = '[insert-first, /document, \n<node/>]'
455        self._format_test(action, expected)
456
457    def test_rename_node(self):
458        # Move 1 down
459        action = actions.RenameNode('/document/node[1]', 'newtag')
460        expected = '[rename, /document/node[1], newtag]'
461        self._format_test(action, expected)
462
463        # Move 2 up (same result, different diff)
464        action = actions.MoveNode('/document/node[2]', '/document', 0)
465        expected = '[move-first, /document/node[2], /document]'
466        self._format_test(action, expected)
467
468    def test_update_attr(self):
469        action = actions.UpdateAttrib('/document/node', 'attr', 'newval')
470        expected = '[update, /document/node/@attr, "newval"]'
471        self._format_test(action, expected)
472
473    def test_update_text_in(self):
474        action = actions.UpdateTextIn('/document/node', 'Text')
475        expected = '[update, /document/node/text()[1], "Text"]'
476        self._format_test(action, expected)
477
478        action = actions.UpdateTextIn('/document/node',
479                                      'Also a bit of text, "rick"')
480        expected = '[update, /document/node/text()[1], '\
481            u'"Also a bit of text, \\"rick\\""]'
482        self._format_test(action, expected)
483
484    def test_update_text_after_1(self):
485        action = actions.UpdateTextAfter('/document/node[1]', 'Text')
486        expected = '[update, /document/node[1]/text()[2], "Text"]'
487        self._format_test(action, expected)
488
489    def test_update_text_after_2(self):
490        action = actions.UpdateTextAfter('/document/node',
491                                         'Also a bit of text, rick')
492        expected = '[update, /document/node/text()[2], '\
493            u'"Also a bit of text, rick"]'
494        self._format_test(action, expected)
495
496    def test_all_actions(self):
497        here = os.path.split(__file__)[0]
498        lfile = os.path.join(here, 'test_data', 'all_actions.left.xml')
499        rfile = os.path.join(here, 'test_data', 'all_actions.right.xml')
500
501        formatter = formatting.XmlDiffFormatter()
502        result = main.diff_files(lfile, rfile, formatter=formatter)
503        expected = (
504            u'[move-after, /document/node[2], /document/tag[1]]\n'
505            u'[insert-comment, /document[1], 0,  Insert a new comment ]\n'
506            u'[update, /document/node[1]/@name, "was updated"]\n'
507            u'[remove, /document/node[1]/@attribute]\n'
508            u'[insert, /document/node[1], \n'
509            u'<@newtribute>\n'
510            u'renamed\n'
511            u'</@newtribute>]\n'
512            u'[insert, /document/node[1], \n'
513            u'<@this>\n'
514            u'is new\n'
515            u'</@this>]\n'
516            u'[remove, /document/node[1]/@attr]\n'
517            u'[update, /document/node[1]/text()[1], "\\n    Modified\\n  "]\n'
518            u'[update, /document/node[1]/text()[2], "\\n    '
519            u'New tail content\\n  "]\n'
520            u'[rename, /document/node[2], nod]\n'
521            u'[insert-after, /document/tail[1], \n'
522            u'<new/>]\n'
523            u'[remove, /document/tail[1]]'
524        )
525        self.assertEqual(result, expected)
526
527
528class FormatterFileTests(unittest.TestCase):
529
530    formatter = None  # Override this
531    maxDiff = None
532
533    def process(self, left, right):
534        return main.diff_files(left, right, formatter=self.formatter)
535
536
537class XMLFormatterFileTests(FormatterFileTests):
538
539    # The XMLFormatter has no text or formatting tags, so
540    formatter = formatting.XMLFormatter(pretty_print=False,
541                                        normalize=formatting.WS_TEXT)
542
543
544# Also test the bits that handle text tags:
545
546class HTMLFormatterFileTests(FormatterFileTests):
547
548    # We use a few tags for the placeholder tests.
549    # <br/> is intentionally left out, to test an edge case
550    # with empty non-formatting tags in text.
551    formatter = formatting.XMLFormatter(
552        normalize=formatting.WS_BOTH,
553        pretty_print=True,
554        text_tags=('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'),
555        formatting_tags=('b', 'u', 'i', 'strike', 'em', 'super',
556                         'sup', 'sub', 'link', 'a', 'span'))
557
558
559# Add tests that use no placeholder replacement (ie plain XML)
560data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
561generate_filebased_cases(data_dir, XMLFormatterFileTests)
562
563# Add tests that use placeholder replacement (ie HTML)
564data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
565generate_filebased_cases(data_dir, HTMLFormatterFileTests, suffix='html')
566