1import os 2from io import BytesIO 3 4from translate.convert import html2po, po2html, test_convert 5 6 7class TestHTML2PO: 8 def html2po( 9 self, 10 markup, 11 duplicatestyle="msgctxt", 12 keepcomments=False, 13 ): 14 """Helper to convert html to po without a file.""" 15 inputfile = BytesIO(markup.encode() if isinstance(markup, str) else markup) 16 convertor = html2po.html2po() 17 return convertor.convertfile(inputfile, "test", duplicatestyle, keepcomments) 18 19 def po2html(self, posource, htmltemplate): 20 """Helper to convert po to html without a file.""" 21 # Convert pofile object to bytes 22 inputfile = BytesIO(bytes(posource)) 23 outputfile = BytesIO() 24 templatefile = BytesIO(htmltemplate.encode()) 25 assert po2html.converthtml(inputfile, outputfile, templatefile) 26 return outputfile.getvalue().decode("utf-8") 27 28 def countunits(self, pofile, expected): 29 """helper to check that we got the expected number of messages""" 30 actual = len(pofile.units) 31 if actual > 0: 32 if pofile.units[0].isheader(): 33 actual = actual - 1 34 print(pofile) 35 assert actual == expected 36 37 def compareunit(self, pofile, unitnumber, expected): 38 """helper to validate a PO message""" 39 if not pofile.units[0].isheader(): 40 unitnumber = unitnumber - 1 41 print("unit source: " + pofile.units[unitnumber].source + "|") 42 print("expected: " + expected + "|") 43 assert str(pofile.units[unitnumber].source) == str(expected) 44 45 def check_single(self, markup, itemtext): 46 """checks that converting this markup produces a single element with value itemtext""" 47 pofile = self.html2po(markup) 48 self.countunits(pofile, 1) 49 self.compareunit(pofile, 1, itemtext) 50 51 def check_null(self, markup): 52 """checks that converting this markup produces no elements""" 53 pofile = self.html2po(markup) 54 self.countunits(pofile, 0) 55 56 def check_phpsnippet(self, php): 57 """Given a snippet of php, put it into an HTML shell and see if the results are as expected""" 58 self.check_single( 59 '<html><head></head><body><p><a href="' 60 + php 61 + '/site.html">Body text</a></p></body></html>', 62 "Body text", 63 ) 64 self.check_single( 65 '<html><head></head><body><p>More things in <a href="' 66 + php 67 + '/site.html">Body text</a></p></body></html>', 68 'More things in <a href="' + php + '/site.html">Body text</a>', 69 ) 70 self.check_single( 71 "<html><head></head><body><p>" + php + "</p></body></html>", php 72 ) 73 74 def test_extract_lang_attribute_from_html_tag(self): 75 """Test that the lang attribute is extracted from the html tag, issue #3884""" 76 markup = """<!DOCTYPE html> 77<html lang="en"> 78 <head> 79 <title>translate lang attribute</title> 80 </head> 81 <body> 82 </body> 83</html> 84""" 85 pofile = self.html2po(markup) 86 self.countunits(pofile, 2) 87 self.compareunit(pofile, 1, "en") 88 self.compareunit(pofile, 2, "translate lang attribute") 89 90 def test_do_not_extract_lang_attribute_from_tags_other_than_html(self): 91 """Test that the lang attribute is extracted from the html tag""" 92 self.check_single('<p><span lang="fr">Français</span></p>', "Français") 93 94 def test_title(self): 95 """test that we can extract the <title> tag""" 96 self.check_single( 97 "<html><head><title>My title</title></head><body></body></html>", "My title" 98 ) 99 100 def test_title_with_linebreak(self): 101 """Test a linebreak in the <title> tag""" 102 htmltext = """<html> 103<head> 104 <title>My 105title</title> 106</head> 107<body> 108</body> 109</html> 110""" 111 self.check_single(htmltext, "My title") 112 113 def test_meta(self): 114 """Test that we can extract certain <meta> info from <head>.""" 115 self.check_single( 116 """<html><head><meta name="keywords" content="these are keywords"></head><body></body></html>""", 117 "these are keywords", 118 ) 119 120 def test_tag_p(self): 121 """test that we can extract the <p> tag""" 122 self.check_single( 123 "<html><head></head><body><p>A paragraph.</p></body></html>", "A paragraph." 124 ) 125 126 def test_tag_p_with_br(self): 127 """test that we can extract the <p> tag with an embedded <br> element""" 128 markup = "<p>First line.<br>Second line.</p>" 129 pofile = self.html2po(markup) 130 self.compareunit(pofile, 1, "First line.<br>Second line.") 131 132 def test_tag_p_with_linebreak(self): 133 """Test newlines within the <p> tag.""" 134 htmltext = """<html> 135<head> 136</head> 137<body> 138<p> 139A paragraph is a section in a piece of writing, usually highlighting a 140particular point or topic. It always begins on a new line and usually 141with indentation, and it consists of at least one sentence. 142</p> 143</body> 144</html> 145""" 146 self.check_single( 147 htmltext, 148 "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.", 149 ) 150 151 def test_tag_p_with_linebreak_and_embedded_br(self): 152 """Test newlines within the <p> tag when there is an embedded <br> element.""" 153 markup = "<p>First\nline.<br>Second\nline.</p>" 154 pofile = self.html2po(markup) 155 self.compareunit(pofile, 1, "First line.<br>Second line.") 156 157 def test_uppercase_html(self): 158 """Should ignore the casing of the html tags.""" 159 self.check_single( 160 "<HTML><HEAD></HEAD><BODY><P>A paragraph.</P></BODY></HTML>", "A paragraph." 161 ) 162 163 def test_tag_div(self): 164 """test that we can extract the <div> tag""" 165 self.check_single( 166 "<html><head></head><body><div>A paragraph.</div></body></html>", 167 "A paragraph.", 168 ) 169 markup = "<div>First line.<br>Second line.</div>" 170 pofile = self.html2po(markup) 171 self.compareunit(pofile, 1, "First line.<br>Second line.") 172 173 def test_tag_div_with_linebreaks(self): 174 """Test linebreaks within a <div> tag.""" 175 htmltext = """<html> 176<head> 177</head> 178<body> 179<div> 180A paragraph is a section in a piece of writing, usually highlighting a 181particular point or topic. It always begins on a new line and usually 182with indentation, and it consists of at least one sentence. 183</div> 184</body> 185</html> 186""" 187 self.check_single( 188 htmltext, 189 "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.", 190 ) 191 markup = "<div>First\nline.<br>Second\nline.</div>" 192 pofile = self.html2po(markup) 193 self.compareunit(pofile, 1, "First line.<br>Second line.") 194 195 def test_tag_a(self): 196 """test that we can extract the <a> tag""" 197 self.check_single( 198 '<html><head></head><body><p>A paragraph with <a href="http://translate.org.za/">hyperlink</a>.</p></body></html>', 199 'A paragraph with <a href="http://translate.org.za/">hyperlink</a>.', 200 ) 201 202 def test_tag_a_with_linebreak(self): 203 """Test that we can extract the <a> tag with newlines in it.""" 204 htmltext = """<html> 205<head> 206</head> 207<body> 208<p>A 209paragraph 210with <a 211href="http://translate.org.za/">hyperlink</a> 212and 213newlines.</p></body></html> 214""" 215 self.check_single( 216 htmltext, 217 'A paragraph with <a href="http://translate.org.za/">hyperlink</a> and newlines.', 218 ) 219 220 def test_sequence_of_anchor_elements(self): 221 """test that we can extract a sequence of anchor elements without mixing up start/end tags, issue #3768""" 222 self.check_single( 223 '<p><a href="http://example.com">This is a link</a> but this is not. <a href="http://example.com">However this is too</a></p>', 224 '<a href="http://example.com">This is a link</a> but this is not. <a href="http://example.com">However this is too</a>', 225 ) 226 227 def test_tag_img(self): 228 """Test that we can extract the alt attribute from the <img> tag.""" 229 self.check_single( 230 """<html><head></head><body><img src="picture.png" alt="A picture"></body></html>""", 231 "A picture", 232 ) 233 234 def test_img_empty(self): 235 """Test that we can extract the alt attribute from the <img> tag.""" 236 htmlsource = """<html><head></head><body><img src="images/topbar.jpg" width="750" height="80"></body></html>""" 237 self.check_null(htmlsource) 238 239 def test_tag_img_inside_a(self): 240 """Test that we can extract the alt attribute from the <img> tag when the img is embedded in a link.""" 241 self.check_single( 242 """<html><head></head><body><p><a href="#"><img src="picture.png" alt="A picture" /></a></p></body></html>""", 243 "A picture", 244 ) 245 246 def test_tag_table_summary(self): 247 """Test that we can extract the summary attribute.""" 248 self.check_single( 249 """<html><head></head><body><table summary="Table summary"></table></body></html>""", 250 "Table summary", 251 ) 252 253 def test_table_simple(self): 254 """Test that we can fully extract a simple table.""" 255 markup = """<html><head></head><body><table><tr><th>Heading One</th><th>Heading Two</th></tr><tr><td>One</td><td>Two</td></tr></table></body></html>""" 256 pofile = self.html2po(markup) 257 self.countunits(pofile, 4) 258 self.compareunit(pofile, 1, "Heading One") 259 self.compareunit(pofile, 2, "Heading Two") 260 self.compareunit(pofile, 3, "One") 261 self.compareunit(pofile, 4, "Two") 262 263 def test_table_complex(self): 264 markup = """<table summary="This is the summary"><caption>A caption</caption><thead><tr><th abbr="Head 1">Heading One</th><th>Heading Two</th></tr></thead><tfoot><tr><td>Foot One</td><td>Foot Two</td></tr></tfoot><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>""" 265 pofile = self.html2po(markup) 266 self.countunits(pofile, 9) 267 self.compareunit(pofile, 1, "This is the summary") 268 self.compareunit(pofile, 2, "A caption") 269 self.compareunit(pofile, 3, "Head 1") 270 self.compareunit(pofile, 4, "Heading One") 271 self.compareunit(pofile, 5, "Heading Two") 272 self.compareunit(pofile, 6, "Foot One") 273 self.compareunit(pofile, 7, "Foot Two") 274 self.compareunit(pofile, 8, "One") 275 self.compareunit(pofile, 9, "Two") 276 277 def test_table_empty(self): 278 """Test that we ignore tables that are empty. 279 280 A table is deemed empty if it has no translatable content. 281 """ 282 283 self.check_null( 284 """<html><head></head><body><table><tr><td><img src="bob.png"></td></tr></table></body></html>""" 285 ) 286 self.check_null( 287 """<html><head></head><body><table><tr><td> </td></tr></table></body></html>""" 288 ) 289 self.check_null( 290 """<html><head></head><body><table><tr><td><strong></strong></td></tr></table></body></html>""" 291 ) 292 293 def test_address(self): 294 """Test to see if the address element is extracted""" 295 self.check_single("<body><address>My address</address></body>", "My address") 296 297 def test_headings(self): 298 """Test to see if the h* elements are extracted""" 299 markup = "<html><head></head><body><h1>Heading One</h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6></body></html>" 300 pofile = self.html2po(markup) 301 self.countunits(pofile, 6) 302 self.compareunit(pofile, 1, "Heading One") 303 self.compareunit(pofile, 2, "Heading Two") 304 self.compareunit(pofile, 3, "Heading Three") 305 self.compareunit(pofile, 4, "Heading Four") 306 self.compareunit(pofile, 5, "Heading Five") 307 self.compareunit(pofile, 6, "Heading Six") 308 309 def test_headings_with_linebreaks(self): 310 """Test to see if h* elements with newlines can be extracted""" 311 markup = "<html><head></head><body><h1>Heading\nOne</h1><h2>Heading\nTwo</h2><h3>Heading\nThree</h3><h4>Heading\nFour</h4><h5>Heading\nFive</h5><h6>Heading\nSix</h6></body></html>" 312 pofile = self.html2po(markup) 313 self.countunits(pofile, 6) 314 self.compareunit(pofile, 1, "Heading One") 315 self.compareunit(pofile, 2, "Heading Two") 316 self.compareunit(pofile, 3, "Heading Three") 317 self.compareunit(pofile, 4, "Heading Four") 318 self.compareunit(pofile, 5, "Heading Five") 319 self.compareunit(pofile, 6, "Heading Six") 320 321 def test_dt(self): 322 """Test to see if the definition list title (dt) element is extracted""" 323 self.check_single( 324 "<html><head></head><body><dl><dt>Definition List Item Title</dt></dl></body></html>", 325 "Definition List Item Title", 326 ) 327 328 def test_dd(self): 329 """Test to see if the definition list description (dd) element is extracted""" 330 self.check_single( 331 "<html><head></head><body><dl><dd>Definition List Item Description</dd></dl></body></html>", 332 "Definition List Item Description", 333 ) 334 335 def test_span(self): 336 """test to check that we don't double extract a span item""" 337 self.check_single( 338 "<html><head></head><body><p>You are a <span>Spanish</span> sentence.</p></body></html>", 339 "You are a <span>Spanish</span> sentence.", 340 ) 341 342 def test_ul(self): 343 """Test to see if the list item <li> is extracted""" 344 markup = "<html><head></head><body><ul><li>Unordered One</li><li>Unordered Two</li></ul><ol><li>Ordered One</li><li>Ordered Two</li></ol></body></html>" 345 pofile = self.html2po(markup) 346 self.countunits(pofile, 4) 347 self.compareunit(pofile, 1, "Unordered One") 348 self.compareunit(pofile, 2, "Unordered Two") 349 self.compareunit(pofile, 3, "Ordered One") 350 self.compareunit(pofile, 4, "Ordered Two") 351 352 def test_nested_lists(self): 353 """Nested lists should be extracted correctly""" 354 markup = """<!DOCTYPE html><html><head><title>Nested lists</title></head><body> 355<ul> 356 <li>Vegetables</li> 357 <li>Fruit 358 <ul> 359 <li>Bananas</li> 360 <li>Apples</li> 361 <li>Pears</li> 362 </ul> 363 yeah, that should be enough 364 </li> 365 <li>Meat</li> 366</ul> 367</body></html>""" 368 pofile = self.html2po(markup) 369 self.countunits(pofile, 8) 370 self.compareunit(pofile, 1, "Nested lists") 371 self.compareunit(pofile, 2, "Vegetables") 372 self.compareunit(pofile, 3, "Fruit") 373 self.compareunit(pofile, 4, "Bananas") 374 self.compareunit(pofile, 5, "Apples") 375 self.compareunit(pofile, 6, "Pears") 376 self.compareunit(pofile, 7, "yeah, that should be enough") 377 self.compareunit(pofile, 8, "Meat") 378 379 def test_duplicates(self): 380 """check that we use the default style of msgctxt to disambiguate duplicate messages""" 381 markup = ( 382 "<html><head></head><body><p>Duplicate</p><p>Duplicate</p></body></html>" 383 ) 384 pofile = self.html2po(markup) 385 self.countunits(pofile, 2) 386 # FIXME change this so that we check that the msgctxt is correctly added 387 self.compareunit(pofile, 1, "Duplicate") 388 assert pofile.units[1].getlocations() == ["None+html.body.p:1-26"] 389 self.compareunit(pofile, 2, "Duplicate") 390 assert pofile.units[2].getlocations() == ["None+html.body.p:1-42"] 391 392 def test_multiline_reflow(self): 393 """check that we reflow multiline content to make it more readable for translators""" 394 self.check_single( 395 """<td valign="middle" width="96%"><font class="headingwhite">South 396 Africa</font></td>""", 397 """South Africa""", 398 ) 399 400 def test_nested_tags(self): 401 """check that we can extract items within nested tags""" 402 markup = "<div><p>Extract this</p>And this</div>" 403 pofile = self.html2po(markup) 404 self.countunits(pofile, 2) 405 self.compareunit(pofile, 1, "Extract this") 406 self.compareunit(pofile, 2, "And this") 407 408 def test_carriage_return(self): 409 """Remove carriage returns from files in dos format.""" 410 htmlsource = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\r 411<html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->\r 412<head>\r 413<!-- InstanceBeginEditable name="doctitle" -->\r 414<link href="fmfi.css" rel="stylesheet" type="text/css">\r 415</head>\r 416\r 417<body>\r 418<p>The rapid expansion of telecommunications infrastructure in recent\r 419years has helped to bridge the digital divide to a limited extent.</p> \r 420</body>\r 421<!-- InstanceEnd --></html>\r 422""" 423 424 self.check_single( 425 htmlsource, 426 "The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.", 427 ) 428 429 def test_encoding_latin1(self): 430 """Convert HTML input in iso-8859-1 correctly to unicode.""" 431 """Also verifies that the charset declaration isn't extracted as a translation unit.""" 432 htmlsource = b"""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 433<html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" --> 434<head> 435<!-- InstanceBeginEditable name="doctitle" --> 436<title>FMFI - South Africa - CSIR Openphone - Overview</title> 437<!-- InstanceEndEditable --> 438<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 439<meta name="keywords" content="fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community"> 440 441<!-- InstanceBeginEditable name="head" --> 442<!-- InstanceEndEditable --> 443<link href="../../../fmfi.css" rel="stylesheet" type="text/css"> 444</head> 445 446<body> 447<p>We aim to please \x96 will you aim too, please?</p> 448<p>South Africa\x92s language diversity can be challenging.</p> 449</body> 450</html> 451""" 452 pofile = self.html2po(htmlsource) 453 self.countunits(pofile, 4) 454 self.compareunit(pofile, 1, "FMFI - South Africa - CSIR Openphone - Overview") 455 self.compareunit( 456 pofile, 457 2, 458 "fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community", 459 ) 460 self.compareunit(pofile, 3, "We aim to please \x96 will you aim too, please?") 461 self.compareunit( 462 pofile, 4, "South Africa\x92s language diversity can be challenging." 463 ) 464 465 def test_strip_html(self): 466 """Ensure that unnecessary html is stripped from the resulting unit.""" 467 468 htmlsource = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 469<html> 470<head> 471<title>FMFI - Contact</title> 472</head> 473<body> 474<table width="100%" border="0" cellpadding="0" cellspacing="0"> 475 <tr align="left" valign="top"> 476 <td width="150" height="556"> 477 <table width="157" height="100%" border="0" cellspacing="0" id="leftmenubg-color"> 478 <tr> 479 <td align="left" valign="top" height="555"> 480 <table width="100%" border="0" cellspacing="0" cellpadding="2"> 481 <tr align="left" valign="top" bgcolor="#660000"> 482 <td width="4%"><strong></strong></td> 483 <td width="96%"><strong><font class="headingwhite">Projects</font></strong></td> 484 </tr> 485 <tr align="left" valign="top"> 486 <td valign="middle" width="4%"><img src="images/arrow.gif" width="8" height="8"></td> 487 <td width="96%"><a href="index.html">Home Page</a></td> 488 </tr> 489 </table> 490 </td> 491 </tr> 492 </table> 493 </td> 494 </tr> 495</table> 496</body> 497</html> 498""" 499 pofile = self.html2po(htmlsource) 500 self.countunits(pofile, 3) 501 self.compareunit(pofile, 2, "Projects") 502 self.compareunit(pofile, 3, "Home Page") 503 504 # Translate and convert back: 505 pofile.units[2].target = "Projekte" 506 pofile.units[3].target = "Tuisblad" 507 htmlresult = ( 508 self.po2html(bytes(pofile), htmlsource) 509 .replace("\n", " ") 510 .replace('= "', '="') 511 .replace("> <", "><") 512 ) 513 snippet = '<td width="96%"><strong><font class="headingwhite">Projekte</font></strong></td>' 514 assert snippet in htmlresult 515 snippet = '<td width="96%"><a href="index.html">Tuisblad</a></td>' 516 assert snippet in htmlresult 517 518 def test_entityrefs_in_text(self): 519 """Should extract html entityrefs, preserving the ones representing reserved characters""" 520 """`See <https://developer.mozilla.org/en-US/docs/Glossary/Entity>`.""" 521 self.check_single( 522 "<html><head></head><body><p><not an element> & " ' ’</p></body></html>", 523 "<not an element> & \" ' \u2019", 524 ) 525 526 def test_entityrefs_in_attributes(self): 527 """Should convert html entityrefs in attribute values""" 528 # it would be even nicer if " and ' could be preserved, but the automatic unescaping of 529 # attributes is deep inside html.HTMLParser. 530 self.check_single( 531 '<html><head></head><body><img alt="<not an element> & " ' ’"></body></html>', 532 "<not an element> & \" ' \u2019", 533 ) 534 535 def test_charrefs(self): 536 """Should extract html charrefs""" 537 self.check_single( 538 "<html><head></head><body><p>’ ’</p></body></html>", 539 "\u2019 \u2019", 540 ) 541 542 def test_php(self): 543 """Test that PHP snippets don't interfere""" 544 545 # A simple string 546 self.check_phpsnippet("""<?=$phpvariable?>""") 547 548 # Contains HTML tag characters (< and >) 549 self.check_phpsnippet("""<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>""") 550 551 # Make sure basically any symbol can be handled 552 # NOTE quotation mark removed since it violates the HTML format when placed in an attribute 553 self.check_phpsnippet( 554 """<? asdfghjkl qwertyuiop 1234567890!@#$%^&*()-=_+[]\\{}|;':,./<>? ?>""" 555 ) 556 557 def test_multiple_php(self): 558 """Test multiple PHP snippets in a string to make sure they get restored properly""" 559 php1 = """<?=$phpvariable?>""" 560 php2 = """<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>""" 561 php3 = """<? asdfghjklqwertyuiop1234567890!@#$%^&*()-=_+[]\\{}|;':",./<>? ?>""" 562 563 # Put 3 different strings into an html string 564 innertext = ( 565 '<a href="' 566 + php1 567 + '/site.html">Body text</a> and some ' 568 + php2 569 + " more text " 570 + php2 571 + php3 572 ) 573 htmlsource = "<html><head></head><body><p>" + innertext + "</p></body></html>" 574 self.check_single(htmlsource, innertext) 575 576 def test_php_multiline(self): 577 578 # A multi-line php string to test 579 php1 = """<? abc 580def 581ghi ?>""" 582 583 # Scatter the php strings throughout the file, and show what the translation should be 584 innertext = ( 585 '<a href="' 586 + php1 587 + '/site.html">Body text</a> and some ' 588 + php1 589 + " more text " 590 + php1 591 + php1 592 ) 593 innertrans = ( 594 '<a href="' 595 + php1 596 + '/site.html">Texte de corps</a> et encore de ' 597 + php1 598 + " plus de texte " 599 + php1 600 + php1 601 ) 602 603 htmlsource = ( 604 "<html><head></head><body><p>" + innertext + "</p></body></html>" 605 ) # Current html file 606 transsource = ( 607 "<html><head></head><body><p>" + innertrans + "</p></body></html>" 608 ) # Expected translation 609 610 pofile = self.html2po(htmlsource) 611 pofile.units[1].target = innertrans # Register the translation in the PO file 612 htmlresult = self.po2html(pofile, htmlsource) 613 assert htmlresult == transsource 614 615 def test_php_with_embedded_html(self): 616 """Should not consume HTML within processing instructions""" 617 self.check_single( 618 "<html><head></head><body><p>a <? <p>b</p> ?> c</p></body></html>", 619 "a <? <p>b</p> ?> c", 620 ) 621 622 def test_comments(self): 623 """Test that HTML comments are converted to translator notes in output""" 624 pofile = self.html2po( 625 "<!-- comment outside block --><p><!-- a comment -->A paragraph<!-- with another comment -->.</p>", 626 keepcomments=True, 627 ) 628 self.compareunit(pofile, 1, "A paragraph.") 629 notes = pofile.getunits()[-1].getnotes() 630 assert str(notes) == " a comment \n with another comment " 631 632 def test_attribute_without_value(self): 633 htmlsource = """<ul> 634 <li><a href="logoColor.eps" download>EPS färg</a></li> 635 </ul> 636""" 637 pofile = self.html2po(htmlsource) 638 self.compareunit(pofile, 1, "EPS färg") 639 640 641class TestHTML2POCommand(test_convert.TestConvertCommand, TestHTML2PO): 642 """Tests running actual html2po commands on files""" 643 644 convertmodule = html2po 645 defaultoptions = {"progress": "none"} 646 647 def test_multifile_single(self): 648 """Test the --multifile=single option and make sure it produces one pot file per input file.""" 649 self.create_testfile( 650 "file1.html", "<div>You are only coming through in waves</div>" 651 ) 652 self.create_testfile( 653 "file2.html", "<div>Your lips move but I cannot hear what you say</div>" 654 ) 655 self.run_command("./", "pots", pot=True, multifile="single") 656 assert os.path.isfile(self.get_testfilename("pots/file1.pot")) 657 assert os.path.isfile(self.get_testfilename("pots/file2.pot")) 658 content = str(self.read_testfile("pots/file1.pot")) 659 assert "coming through" in content 660 assert "cannot hear" not in content 661 662 def test_multifile_onefile(self): 663 """Test the --multifile=onefile option and make sure it produces a file, not a directory.""" 664 self.create_testfile( 665 "file1.html", "<div>You are only coming through in waves</div>" 666 ) 667 self.create_testfile( 668 "file2.html", "<div>Your lips move but I cannot hear what you say</div>" 669 ) 670 self.run_command("./", "one.pot", pot=True, multifile="onefile") 671 assert os.path.isfile(self.get_testfilename("one.pot")) 672 content = str(self.read_testfile("one.pot")) 673 assert "coming through" in content 674 assert "cannot hear" in content 675 676 def test_multifile_onefile_to_stdout(self, capsys): 677 """Test the --multifile=onefile option without specifying an output file. Default is stdout.""" 678 self.create_testfile( 679 "file1.html", "<div>You are only coming through in waves</div>" 680 ) 681 self.create_testfile( 682 "file2.html", "<div>Your lips move but I cannot hear what you say</div>" 683 ) 684 self.run_command("./", pot=True, multifile="onefile") 685 content, err = capsys.readouterr() 686 assert "coming through" in content 687 assert "cannot hear" in content 688 assert err == "" 689 690 def test_help(self, capsys): 691 """Test getting help.""" 692 options = super().test_help(capsys) 693 options = self.help_check(options, "-P, --pot") 694 options = self.help_check(options, "--duplicates=DUPLICATESTYLE") 695 options = self.help_check(options, "--keepcomments") 696 options = self.help_check(options, "--multifile=MULTIFILESTYLE", last=True) 697