1# -*- coding: UTF-8 -*- 2import os 3import sys 4import unittest 5 6from lxml import etree 7from xmldiff import formatting, main, actions 8 9from .testing import generate_filebased_cases 10 11START = u'<document xmlns:diff="http://namespaces.shoobx.com/diff"><node' 12END = u'</node></document>' 13 14 15class PlaceholderMakerTests(unittest.TestCase): 16 17 def test_get_placeholder(self): 18 replacer = formatting.PlaceholderMaker() 19 # Get a placeholder: 20 ph = replacer.get_placeholder( 21 etree.Element('tag'), formatting.T_OPEN, None) 22 self.assertEqual(ph, u'\ue005') 23 # Do it again: 24 ph = replacer.get_placeholder( 25 etree.Element('tag'), formatting.T_OPEN, None) 26 self.assertEqual(ph, u'\ue005') 27 # Get another one 28 ph = replacer.get_placeholder( 29 etree.Element('tag'), formatting.T_CLOSE, ph) 30 self.assertEqual(ph, u'\ue006') 31 32 def test_do_element(self): 33 replacer = formatting.PlaceholderMaker(['p'], ['b']) 34 35 # Formatting tags get replaced, and the content remains 36 text = u'<p>This is a tag with <b>formatted</b> text.</p>' 37 element = etree.fromstring(text) 38 replacer.do_element(element) 39 40 self.assertEqual( 41 etree.tounicode(element), 42 u'<p>This is a tag with \ue006formatted\ue005 text.</p>') 43 44 replacer.undo_element(element) 45 self.assertEqual(etree.tounicode(element), text) 46 47 # Non formatting tags get replaced with content 48 text = u'<p>This is a tag with <foo>formatted</foo> text.</p>' 49 element = etree.fromstring(text) 50 replacer.do_element(element) 51 result = etree.tounicode(element) 52 self.assertEqual( 53 result, 54 u'<p>This is a tag with \ue007 text.</p>') 55 56 # Single formatting tags still get two placeholders. 57 text = u'<p>This is a <b/> with <foo/> text.</p>' 58 element = etree.fromstring(text) 59 replacer.do_element(element) 60 result = etree.tounicode(element) 61 self.assertEqual( 62 result, 63 u'<p>This is a \ue009\ue008 with \ue00a text.</p>') 64 65 def test_do_undo_element(self): 66 replacer = formatting.PlaceholderMaker(['p'], ['b']) 67 68 # Formatting tags get replaced, and the content remains 69 text = u'<p>This <is/> a <f>tag</f> with <b>formatted</b> text.</p>' 70 element = etree.fromstring(text) 71 replacer.do_element(element) 72 73 self.assertEqual( 74 element.text, 75 u'This \ue005 a \ue006 with \ue008formatted' 76 u'\ue007 text.') 77 78 replacer.undo_element(element) 79 result = etree.tounicode(element) 80 self.assertEqual(result, text) 81 82 def test_do_undo_element_double_format(self): 83 replacer = formatting.PlaceholderMaker(['p'], ['b', 'u']) 84 85 # Formatting tags get replaced, and the content remains 86 text = u'<p>This is <u>doubly <b>formatted</b></u> text.</p>' 87 element = etree.fromstring(text) 88 replacer.do_element(element) 89 90 self.assertEqual( 91 element.text, 92 u'This is \ue006doubly \ue008formatted\ue007' 93 u'\ue005 text.') 94 95 replacer.undo_element(element) 96 result = etree.tounicode(element) 97 self.assertEqual(result, text) 98 99 def test_rml_bug(self): 100 etree.register_namespace(formatting.DIFF_PREFIX, formatting.DIFF_NS) 101 before_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff"> 102 <section> 103 <para> 104 <ref>4</ref>. 105 <u><b>At Will Employment</b></u> 106 .\u201cText\u201d 107 </para> 108 </section> 109</document>""" 110 tree = etree.fromstring(before_diff) 111 replacer = formatting.PlaceholderMaker( 112 text_tags=('para',), formatting_tags=('b', 'u', 'i',)) 113 replacer.do_tree(tree) 114 after_diff = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff"> 115 <section> 116 <para> 117 <insert>\ue005</insert>. 118 \ue007\ue009At Will Employment\ue008\ue006 119 .\u201c<insert>New </insert>Text\u201d 120 </para> 121 </section> 122</document>""" 123 124 # The diff formatting will find some text to insert. 125 delete_attrib = u'{%s}delete-format' % formatting.DIFF_NS 126 replacer.placeholder2tag[u'\ue006' 127 ].element.attrib[delete_attrib] = '' 128 replacer.placeholder2tag[u'\ue007' 129 ].element.attrib[delete_attrib] = '' 130 tree = etree.fromstring(after_diff) 131 replacer.undo_tree(tree) 132 result = etree.tounicode(tree) 133 expected = u"""<document xmlns:diff="http://namespaces.shoobx.com/diff"> 134 <section> 135 <para> 136 <insert><ref>4</ref></insert>. 137 <u diff:delete-format=""><b>At Will Employment</b></u> 138 .\u201c<insert>New </insert>Text\u201d 139 </para> 140 </section> 141</document>""" 142 self.assertEqual(result, expected) 143 144 def test_placeholder_overflow(self): 145 # PY3: This whole test is Python 2 support. 146 # Test what happens when we have more than 6400 placeholders, 147 # by patching the placeholder: 148 try: 149 orig_start = formatting.PLACEHOLDER_START 150 # This is the last character of the Private use area 151 formatting.PLACEHOLDER_START = 0xF8FF 152 153 replacer = formatting.PlaceholderMaker(['p'], ['b']) 154 155 # Formatting tags get replaced, and the content remains 156 text = u'<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>' 157 element = etree.fromstring(text) 158 replacer.do_element(element) 159 160 # 161 self.assertEqual( 162 element.text, 163 u'This \uf904 a \uf905 with \uf907some' 164 u'\uf906 text.') 165 166 try: 167 # If this is a wide build, also test what happens if we 168 # get over 8192 substitutions, and overflow the 2-byte code. 169 # (On narrow builds this will give an error) 170 formatting.PLACEHOLDER_START = 0xFFFF 171 172 replacer = formatting.PlaceholderMaker(['p'], ['b']) 173 174 # Formatting tags get replaced, and the content remains 175 text = u'<p>This <is/> a <f>tag</f> with <b>some</b> text.</p>' 176 element = etree.fromstring(text) 177 replacer.do_element(element) 178 179 # This should raise an error on a narrow build 180 self.assertEqual( 181 element.text, 182 u'This \U00010004 a \U00010005 with \U00010007some' 183 u'\U00010006 text.') 184 except ValueError: 185 if sys.maxunicode > 0x10000: 186 # This is a wide build, we should NOT get an error 187 raise 188 189 finally: 190 # Set it back 191 formatting.PLACEHOLDER_START = orig_start 192 193 194class XMLFormatTests(unittest.TestCase): 195 196 def _format_test(self, left, action, expected): 197 formatter = formatting.XMLFormatter(pretty_print=False) 198 result = formatter.format([action], etree.fromstring(left)) 199 self.assertEqual(result, expected) 200 201 def test_incorrect_xpaths(self): 202 left = u'<document><node a="v"/><node>Text</node></document>' 203 expected = START + u' diff:delete-attr="a">Text' + END 204 205 with self.assertRaises(ValueError): 206 action = actions.DeleteAttrib('/document/node', 'a') 207 self._format_test(left, action, expected) 208 209 with self.assertRaises(ValueError): 210 action = actions.DeleteAttrib('/document/ummagumma', 'a') 211 self._format_test(left, action, expected) 212 213 def test_del_attr(self): 214 left = u'<document><node a="v">Text</node></document>' 215 action = actions.DeleteAttrib('/document/node', 'a') 216 expected = START + u' diff:delete-attr="a">Text' + END 217 218 self._format_test(left, action, expected) 219 220 def test_del_node(self): 221 left = u'<document><node attr="val">Text</node></document>' 222 action = actions.DeleteNode('/document/node') 223 expected = START + u' attr="val" diff:delete="">Text' + END 224 225 self._format_test(left, action, expected) 226 227 def test_del_text(self): 228 left = u'<document><node attr="val">Text</node></document>' 229 action = actions.UpdateTextIn('/document/node', None) 230 expected = START + u' attr="val"><diff:delete>Text</diff:delete>' + END 231 232 self._format_test(left, action, expected) 233 234 def test_insert_attr(self): 235 left = u'<document><node>We need more text</node></document>' 236 action = actions.InsertAttrib('/document/node', 'attr', 'val') 237 expected = START + u' attr="val" diff:add-attr="attr">'\ 238 u'We need more text' + END 239 240 self._format_test(left, action, expected) 241 242 def test_insert_node(self): 243 left = u'<document></document>' 244 action = actions.InsertNode('/document', 'node', 0) 245 expected = START + u' diff:insert=""/></document>' 246 247 self._format_test(left, action, expected) 248 249 def test_move_attr(self): 250 # The library currently only uses move attr for when attributes are 251 # renamed: 252 left = u'<document><node attr="val">Text</node></document>' 253 action = actions.RenameAttrib('/document/node', 'attr', 'bottr') 254 expected = START + u' bottr="val" diff:rename-attr="attr:bottr"'\ 255 u'>Text' + END 256 257 self._format_test(left, action, expected) 258 259 def test_move_node(self): 260 # Move 1 down 261 left = u'<document><node id="1" /><node id="2" /></document>' 262 action = actions.MoveNode('/document/node[1]', '/document', 1) 263 expected = START + u' id="1" diff:delete=""/><node id="2"/><node '\ 264 u'id="1" diff:insert=""/></document>' 265 266 self._format_test(left, action, expected) 267 268 # Move 2 up (same result, different diff) 269 left = u'<document><node id="1" /><node id="2" /></document>' 270 action = actions.MoveNode('/document/node[2]', '/document', 0) 271 expected = START + u' id="2" diff:insert=""/><node id="1"/><node '\ 272 u'id="2" diff:delete=""/></document>' 273 274 self._format_test(left, action, expected) 275 276 def test_rename_node(self): 277 left = u'<document><node><para>Content</para>Tail</node></document>' 278 action = actions.RenameNode('/document/node[1]/para[1]', 'newtag') 279 expected = START + u'><newtag diff:rename="para">Content'\ 280 '</newtag>Tail' + END 281 282 self._format_test(left, action, expected) 283 284 def test_update_attr(self): 285 left = u'<document><node attr="val"/></document>' 286 action = actions.UpdateAttrib('/document/node', 'attr', 'newval') 287 expected = START + u' attr="newval" diff:update-attr="attr:val"/>'\ 288 u'</document>' 289 290 self._format_test(left, action, expected) 291 292 def test_update_text_in(self): 293 left = u'<document><node attr="val"/></document>' 294 action = actions.UpdateTextIn('/document/node', 'Text') 295 expected = START + u' attr="val"><diff:insert>Text</diff:insert>' + END 296 297 self._format_test(left, action, expected) 298 299 left = u'<document><node>This is a bit of text, right' + END 300 action = actions.UpdateTextIn('/document/node', 301 'Also a bit of text, rick') 302 expected = START + u'><diff:delete>This is</diff:delete><diff:insert>'\ 303 u'Also</diff:insert> a bit of text, ri<diff:delete>ght'\ 304 u'</diff:delete><diff:insert>ck</diff:insert>' + END 305 306 self._format_test(left, action, expected) 307 308 def test_update_text_after_1(self): 309 left = u'<document><node/><node/></document>' 310 action = actions.UpdateTextAfter('/document/node[1]', 'Text') 311 expected = START + u'/><diff:insert>Text</diff:insert>'\ 312 u'<node/></document>' 313 314 self._format_test(left, action, expected) 315 316 def test_update_text_after_2(self): 317 left = u'<document><node/>This is a bit of text, right</document>' 318 action = actions.UpdateTextAfter('/document/node', 319 'Also a bit of text, rick') 320 expected = START + u'/><diff:delete>This is</diff:delete>'\ 321 u'<diff:insert>Also</diff:insert> a bit of text, ri<diff:delete>'\ 322 u'ght</diff:delete><diff:insert>ck</diff:insert></document>' 323 324 self._format_test(left, action, expected) 325 326 327class DiffFormatTests(unittest.TestCase): 328 329 def _format_test(self, action, expected): 330 formatter = formatting.DiffFormatter() 331 result = formatter.format([action], None) 332 self.assertEqual(result, expected) 333 334 def test_del_attr(self): 335 action = actions.DeleteAttrib('/document/node', 'a') 336 expected = '[delete-attribute, /document/node, a]' 337 self._format_test(action, expected) 338 339 def test_del_node(self): 340 action = actions.DeleteNode('/document/node') 341 expected = '[delete, /document/node]' 342 self._format_test(action, expected) 343 344 def test_del_text(self): 345 action = actions.UpdateTextIn('/document/node', None) 346 expected = '[update-text, /document/node, null]' 347 self._format_test(action, expected) 348 349 def test_insert_attr(self): 350 action = actions.InsertAttrib('/document/node', 'attr', 'val') 351 expected = '[insert-attribute, /document/node, attr, "val"]' 352 self._format_test(action, expected) 353 354 def test_insert_node(self): 355 action = actions.InsertNode('/document', 'node', 0) 356 expected = '[insert, /document, node, 0]' 357 self._format_test(action, expected) 358 359 def test_rename_attr(self): 360 action = actions.RenameAttrib('/document/node', 'attr', 'bottr') 361 expected = '[rename-attribute, /document/node, attr, bottr]' 362 self._format_test(action, expected) 363 364 def test_move_node(self): 365 # Move 1 down 366 action = actions.MoveNode('/document/node[1]', '/document', 1) 367 expected = '[move, /document/node[1], /document, 1]' 368 self._format_test(action, expected) 369 370 # Move 2 up (same result, different diff) 371 action = actions.MoveNode('/document/node[2]', '/document', 0) 372 expected = '[move, /document/node[2], /document, 0]' 373 374 self._format_test(action, expected) 375 376 def test_rename_node(self): 377 # Move 1 down 378 action = actions.RenameNode('/document/node[1]', 'newtag') 379 expected = '[rename, /document/node[1], newtag]' 380 self._format_test(action, expected) 381 382 # Move 2 up (same result, different diff) 383 action = actions.MoveNode('/document/node[2]', '/document', 0) 384 expected = '[move, /document/node[2], /document, 0]' 385 386 self._format_test(action, expected) 387 388 def test_update_attr(self): 389 action = actions.UpdateAttrib('/document/node', 'attr', 'newval') 390 expected = '[update-attribute, /document/node, attr, "newval"]' 391 self._format_test(action, expected) 392 393 def test_update_text_in(self): 394 action = actions.UpdateTextIn('/document/node', 'Text') 395 expected = '[update-text, /document/node, "Text"]' 396 self._format_test(action, expected) 397 398 action = actions.UpdateTextIn('/document/node', 399 'Also a bit of text, "rick"') 400 expected = '[update-text, /document/node, '\ 401 u'"Also a bit of text, \\"rick\\""]' 402 self._format_test(action, expected) 403 404 def test_update_text_after_1(self): 405 action = actions.UpdateTextAfter('/document/node[1]', 'Text') 406 expected = '[update-text-after, /document/node[1], "Text"]' 407 self._format_test(action, expected) 408 409 def test_update_text_after_2(self): 410 action = actions.UpdateTextAfter('/document/node', 411 'Also a bit of text, rick') 412 expected = '[update-text-after, /document/node, '\ 413 u'"Also a bit of text, rick"]' 414 self._format_test(action, expected) 415 416 def test_insert_comment(self): 417 action = actions.InsertComment('/document/node', 2, 'Commentary') 418 expected = '[insert-comment, /document/node, 2, "Commentary"]' 419 self._format_test(action, expected) 420 421 422class XmlDiffFormatTests(unittest.TestCase): 423 # RenameAttr and MoveNode requires an orig_tree, so they 424 # are not tested in the _format_test tests, but in the 425 # all_actions test, which uses test_data files. 426 427 def _format_test(self, action, expected): 428 formatter = formatting.XmlDiffFormatter() 429 result = formatter.format([action], None) 430 self.assertEqual(result, expected) 431 432 def test_del_attr(self): 433 action = actions.DeleteAttrib('/document/node', 'a') 434 expected = '[remove, /document/node/@a]' 435 self._format_test(action, expected) 436 437 def test_del_node(self): 438 action = actions.DeleteNode('/document/node') 439 expected = '[remove, /document/node]' 440 self._format_test(action, expected) 441 442 def test_del_text(self): 443 action = actions.UpdateTextIn('/document/node', None) 444 expected = '[update, /document/node/text()[1], null]' 445 self._format_test(action, expected) 446 447 def test_insert_attr(self): 448 action = actions.InsertAttrib('/document/node', 'attr', 'val') 449 expected = '[insert, /document/node, \n<@attr>\nval\n</@attr>]' 450 self._format_test(action, expected) 451 452 def test_insert_node(self): 453 action = actions.InsertNode('/document', 'node', 0) 454 expected = '[insert-first, /document, \n<node/>]' 455 self._format_test(action, expected) 456 457 def test_rename_node(self): 458 # Move 1 down 459 action = actions.RenameNode('/document/node[1]', 'newtag') 460 expected = '[rename, /document/node[1], newtag]' 461 self._format_test(action, expected) 462 463 # Move 2 up (same result, different diff) 464 action = actions.MoveNode('/document/node[2]', '/document', 0) 465 expected = '[move-first, /document/node[2], /document]' 466 self._format_test(action, expected) 467 468 def test_update_attr(self): 469 action = actions.UpdateAttrib('/document/node', 'attr', 'newval') 470 expected = '[update, /document/node/@attr, "newval"]' 471 self._format_test(action, expected) 472 473 def test_update_text_in(self): 474 action = actions.UpdateTextIn('/document/node', 'Text') 475 expected = '[update, /document/node/text()[1], "Text"]' 476 self._format_test(action, expected) 477 478 action = actions.UpdateTextIn('/document/node', 479 'Also a bit of text, "rick"') 480 expected = '[update, /document/node/text()[1], '\ 481 u'"Also a bit of text, \\"rick\\""]' 482 self._format_test(action, expected) 483 484 def test_update_text_after_1(self): 485 action = actions.UpdateTextAfter('/document/node[1]', 'Text') 486 expected = '[update, /document/node[1]/text()[2], "Text"]' 487 self._format_test(action, expected) 488 489 def test_update_text_after_2(self): 490 action = actions.UpdateTextAfter('/document/node', 491 'Also a bit of text, rick') 492 expected = '[update, /document/node/text()[2], '\ 493 u'"Also a bit of text, rick"]' 494 self._format_test(action, expected) 495 496 def test_all_actions(self): 497 here = os.path.split(__file__)[0] 498 lfile = os.path.join(here, 'test_data', 'all_actions.left.xml') 499 rfile = os.path.join(here, 'test_data', 'all_actions.right.xml') 500 501 formatter = formatting.XmlDiffFormatter() 502 result = main.diff_files(lfile, rfile, formatter=formatter) 503 expected = ( 504 u'[move-after, /document/node[2], /document/tag[1]]\n' 505 u'[insert-comment, /document[1], 0, Insert a new comment ]\n' 506 u'[update, /document/node[1]/@name, "was updated"]\n' 507 u'[remove, /document/node[1]/@attribute]\n' 508 u'[insert, /document/node[1], \n' 509 u'<@newtribute>\n' 510 u'renamed\n' 511 u'</@newtribute>]\n' 512 u'[insert, /document/node[1], \n' 513 u'<@this>\n' 514 u'is new\n' 515 u'</@this>]\n' 516 u'[remove, /document/node[1]/@attr]\n' 517 u'[update, /document/node[1]/text()[1], "\\n Modified\\n "]\n' 518 u'[update, /document/node[1]/text()[2], "\\n ' 519 u'New tail content\\n "]\n' 520 u'[rename, /document/node[2], nod]\n' 521 u'[insert-after, /document/tail[1], \n' 522 u'<new/>]\n' 523 u'[remove, /document/tail[1]]' 524 ) 525 self.assertEqual(result, expected) 526 527 528class FormatterFileTests(unittest.TestCase): 529 530 formatter = None # Override this 531 maxDiff = None 532 533 def process(self, left, right): 534 return main.diff_files(left, right, formatter=self.formatter) 535 536 537class XMLFormatterFileTests(FormatterFileTests): 538 539 # The XMLFormatter has no text or formatting tags, so 540 formatter = formatting.XMLFormatter(pretty_print=False, 541 normalize=formatting.WS_TEXT) 542 543 544# Also test the bits that handle text tags: 545 546class HTMLFormatterFileTests(FormatterFileTests): 547 548 # We use a few tags for the placeholder tests. 549 # <br/> is intentionally left out, to test an edge case 550 # with empty non-formatting tags in text. 551 formatter = formatting.XMLFormatter( 552 normalize=formatting.WS_BOTH, 553 pretty_print=True, 554 text_tags=('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'), 555 formatting_tags=('b', 'u', 'i', 'strike', 'em', 'super', 556 'sup', 'sub', 'link', 'a', 'span')) 557 558 559# Add tests that use no placeholder replacement (ie plain XML) 560data_dir = os.path.join(os.path.dirname(__file__), 'test_data') 561generate_filebased_cases(data_dir, XMLFormatterFileTests) 562 563# Add tests that use placeholder replacement (ie HTML) 564data_dir = os.path.join(os.path.dirname(__file__), 'test_data') 565generate_filebased_cases(data_dir, HTMLFormatterFileTests, suffix='html') 566