1import os
2from io import BytesIO
3
4from translate.convert import html2po, po2html, test_convert
5
6
7class TestHTML2PO:
8    def html2po(
9        self,
10        markup,
11        duplicatestyle="msgctxt",
12        keepcomments=False,
13    ):
14        """Helper to convert html to po without a file."""
15        inputfile = BytesIO(markup.encode() if isinstance(markup, str) else markup)
16        convertor = html2po.html2po()
17        return convertor.convertfile(inputfile, "test", duplicatestyle, keepcomments)
18
19    def po2html(self, posource, htmltemplate):
20        """Helper to convert po to html without a file."""
21        # Convert pofile object to bytes
22        inputfile = BytesIO(bytes(posource))
23        outputfile = BytesIO()
24        templatefile = BytesIO(htmltemplate.encode())
25        assert po2html.converthtml(inputfile, outputfile, templatefile)
26        return outputfile.getvalue().decode("utf-8")
27
28    def countunits(self, pofile, expected):
29        """helper to check that we got the expected number of messages"""
30        actual = len(pofile.units)
31        if actual > 0:
32            if pofile.units[0].isheader():
33                actual = actual - 1
34        print(pofile)
35        assert actual == expected
36
37    def compareunit(self, pofile, unitnumber, expected):
38        """helper to validate a PO message"""
39        if not pofile.units[0].isheader():
40            unitnumber = unitnumber - 1
41        print("unit source: " + pofile.units[unitnumber].source + "|")
42        print("expected: " + expected + "|")
43        assert str(pofile.units[unitnumber].source) == str(expected)
44
45    def check_single(self, markup, itemtext):
46        """checks that converting this markup produces a single element with value itemtext"""
47        pofile = self.html2po(markup)
48        self.countunits(pofile, 1)
49        self.compareunit(pofile, 1, itemtext)
50
51    def check_null(self, markup):
52        """checks that converting this markup produces no elements"""
53        pofile = self.html2po(markup)
54        self.countunits(pofile, 0)
55
56    def check_phpsnippet(self, php):
57        """Given a snippet of php, put it into an HTML shell and see if the results are as expected"""
58        self.check_single(
59            '<html><head></head><body><p><a href="'
60            + php
61            + '/site.html">Body text</a></p></body></html>',
62            "Body text",
63        )
64        self.check_single(
65            '<html><head></head><body><p>More things in <a href="'
66            + php
67            + '/site.html">Body text</a></p></body></html>',
68            'More things in <a href="' + php + '/site.html">Body text</a>',
69        )
70        self.check_single(
71            "<html><head></head><body><p>" + php + "</p></body></html>", php
72        )
73
74    def test_extract_lang_attribute_from_html_tag(self):
75        """Test that the lang attribute is extracted from the html tag, issue #3884"""
76        markup = """<!DOCTYPE html>
77<html lang="en">
78    <head>
79        <title>translate lang attribute</title>
80    </head>
81    <body>
82    </body>
83</html>
84"""
85        pofile = self.html2po(markup)
86        self.countunits(pofile, 2)
87        self.compareunit(pofile, 1, "en")
88        self.compareunit(pofile, 2, "translate lang attribute")
89
90    def test_do_not_extract_lang_attribute_from_tags_other_than_html(self):
91        """Test that the lang attribute is extracted from the html tag"""
92        self.check_single('<p><span lang="fr">Français</span></p>', "Français")
93
94    def test_title(self):
95        """test that we can extract the <title> tag"""
96        self.check_single(
97            "<html><head><title>My title</title></head><body></body></html>", "My title"
98        )
99
100    def test_title_with_linebreak(self):
101        """Test a linebreak in the <title> tag"""
102        htmltext = """<html>
103<head>
104  <title>My
105title</title>
106</head>
107<body>
108</body>
109</html>
110"""
111        self.check_single(htmltext, "My title")
112
113    def test_meta(self):
114        """Test that we can extract certain <meta> info from <head>."""
115        self.check_single(
116            """<html><head><meta name="keywords" content="these are keywords"></head><body></body></html>""",
117            "these are keywords",
118        )
119
120    def test_tag_p(self):
121        """test that we can extract the <p> tag"""
122        self.check_single(
123            "<html><head></head><body><p>A paragraph.</p></body></html>", "A paragraph."
124        )
125
126    def test_tag_p_with_br(self):
127        """test that we can extract the <p> tag with an embedded <br> element"""
128        markup = "<p>First line.<br>Second line.</p>"
129        pofile = self.html2po(markup)
130        self.compareunit(pofile, 1, "First line.<br>Second line.")
131
132    def test_tag_p_with_linebreak(self):
133        """Test newlines within the <p> tag."""
134        htmltext = """<html>
135<head>
136</head>
137<body>
138<p>
139A paragraph is a section in a piece of writing, usually highlighting a
140particular point or topic. It always begins on a new line and usually
141with indentation, and it consists of at least one sentence.
142</p>
143</body>
144</html>
145"""
146        self.check_single(
147            htmltext,
148            "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.",
149        )
150
151    def test_tag_p_with_linebreak_and_embedded_br(self):
152        """Test newlines within the <p> tag when there is an embedded <br> element."""
153        markup = "<p>First\nline.<br>Second\nline.</p>"
154        pofile = self.html2po(markup)
155        self.compareunit(pofile, 1, "First line.<br>Second line.")
156
157    def test_uppercase_html(self):
158        """Should ignore the casing of the html tags."""
159        self.check_single(
160            "<HTML><HEAD></HEAD><BODY><P>A paragraph.</P></BODY></HTML>", "A paragraph."
161        )
162
163    def test_tag_div(self):
164        """test that we can extract the <div> tag"""
165        self.check_single(
166            "<html><head></head><body><div>A paragraph.</div></body></html>",
167            "A paragraph.",
168        )
169        markup = "<div>First line.<br>Second line.</div>"
170        pofile = self.html2po(markup)
171        self.compareunit(pofile, 1, "First line.<br>Second line.")
172
173    def test_tag_div_with_linebreaks(self):
174        """Test linebreaks within a <div> tag."""
175        htmltext = """<html>
176<head>
177</head>
178<body>
179<div>
180A paragraph is a section in a piece of writing, usually highlighting a
181particular point or topic. It always begins on a new line and usually
182with indentation, and it consists of at least one sentence.
183</div>
184</body>
185</html>
186"""
187        self.check_single(
188            htmltext,
189            "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.",
190        )
191        markup = "<div>First\nline.<br>Second\nline.</div>"
192        pofile = self.html2po(markup)
193        self.compareunit(pofile, 1, "First line.<br>Second line.")
194
195    def test_tag_a(self):
196        """test that we can extract the <a> tag"""
197        self.check_single(
198            '<html><head></head><body><p>A paragraph with <a href="http://translate.org.za/">hyperlink</a>.</p></body></html>',
199            'A paragraph with <a href="http://translate.org.za/">hyperlink</a>.',
200        )
201
202    def test_tag_a_with_linebreak(self):
203        """Test that we can extract the <a> tag with newlines in it."""
204        htmltext = """<html>
205<head>
206</head>
207<body>
208<p>A
209paragraph
210with <a
211href="http://translate.org.za/">hyperlink</a>
212and
213newlines.</p></body></html>
214"""
215        self.check_single(
216            htmltext,
217            'A paragraph with <a href="http://translate.org.za/">hyperlink</a> and newlines.',
218        )
219
220    def test_sequence_of_anchor_elements(self):
221        """test that we can extract a sequence of anchor elements without mixing up start/end tags, issue #3768"""
222        self.check_single(
223            '<p><a href="http://example.com">This is a link</a> but this is not. <a href="http://example.com">However this is too</a></p>',
224            '<a href="http://example.com">This is a link</a> but this is not. <a href="http://example.com">However this is too</a>',
225        )
226
227    def test_tag_img(self):
228        """Test that we can extract the alt attribute from the <img> tag."""
229        self.check_single(
230            """<html><head></head><body><img src="picture.png" alt="A picture"></body></html>""",
231            "A picture",
232        )
233
234    def test_img_empty(self):
235        """Test that we can extract the alt attribute from the <img> tag."""
236        htmlsource = """<html><head></head><body><img src="images/topbar.jpg" width="750" height="80"></body></html>"""
237        self.check_null(htmlsource)
238
239    def test_tag_img_inside_a(self):
240        """Test that we can extract the alt attribute from the <img> tag when the img is embedded in a link."""
241        self.check_single(
242            """<html><head></head><body><p><a href="#"><img src="picture.png" alt="A picture" /></a></p></body></html>""",
243            "A picture",
244        )
245
246    def test_tag_table_summary(self):
247        """Test that we can extract the summary attribute."""
248        self.check_single(
249            """<html><head></head><body><table summary="Table summary"></table></body></html>""",
250            "Table summary",
251        )
252
253    def test_table_simple(self):
254        """Test that we can fully extract a simple table."""
255        markup = """<html><head></head><body><table><tr><th>Heading One</th><th>Heading Two</th></tr><tr><td>One</td><td>Two</td></tr></table></body></html>"""
256        pofile = self.html2po(markup)
257        self.countunits(pofile, 4)
258        self.compareunit(pofile, 1, "Heading One")
259        self.compareunit(pofile, 2, "Heading Two")
260        self.compareunit(pofile, 3, "One")
261        self.compareunit(pofile, 4, "Two")
262
263    def test_table_complex(self):
264        markup = """<table summary="This is the summary"><caption>A caption</caption><thead><tr><th abbr="Head 1">Heading One</th><th>Heading Two</th></tr></thead><tfoot><tr><td>Foot One</td><td>Foot Two</td></tr></tfoot><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>"""
265        pofile = self.html2po(markup)
266        self.countunits(pofile, 9)
267        self.compareunit(pofile, 1, "This is the summary")
268        self.compareunit(pofile, 2, "A caption")
269        self.compareunit(pofile, 3, "Head 1")
270        self.compareunit(pofile, 4, "Heading One")
271        self.compareunit(pofile, 5, "Heading Two")
272        self.compareunit(pofile, 6, "Foot One")
273        self.compareunit(pofile, 7, "Foot Two")
274        self.compareunit(pofile, 8, "One")
275        self.compareunit(pofile, 9, "Two")
276
277    def test_table_empty(self):
278        """Test that we ignore tables that are empty.
279
280        A table is deemed empty if it has no translatable content.
281        """
282
283        self.check_null(
284            """<html><head></head><body><table><tr><td><img src="bob.png"></td></tr></table></body></html>"""
285        )
286        self.check_null(
287            """<html><head></head><body><table><tr><td>&nbsp;</td></tr></table></body></html>"""
288        )
289        self.check_null(
290            """<html><head></head><body><table><tr><td><strong></strong></td></tr></table></body></html>"""
291        )
292
293    def test_address(self):
294        """Test to see if the address element is extracted"""
295        self.check_single("<body><address>My address</address></body>", "My address")
296
297    def test_headings(self):
298        """Test to see if the h* elements are extracted"""
299        markup = "<html><head></head><body><h1>Heading One</h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6></body></html>"
300        pofile = self.html2po(markup)
301        self.countunits(pofile, 6)
302        self.compareunit(pofile, 1, "Heading One")
303        self.compareunit(pofile, 2, "Heading Two")
304        self.compareunit(pofile, 3, "Heading Three")
305        self.compareunit(pofile, 4, "Heading Four")
306        self.compareunit(pofile, 5, "Heading Five")
307        self.compareunit(pofile, 6, "Heading Six")
308
309    def test_headings_with_linebreaks(self):
310        """Test to see if h* elements with newlines can be extracted"""
311        markup = "<html><head></head><body><h1>Heading\nOne</h1><h2>Heading\nTwo</h2><h3>Heading\nThree</h3><h4>Heading\nFour</h4><h5>Heading\nFive</h5><h6>Heading\nSix</h6></body></html>"
312        pofile = self.html2po(markup)
313        self.countunits(pofile, 6)
314        self.compareunit(pofile, 1, "Heading One")
315        self.compareunit(pofile, 2, "Heading Two")
316        self.compareunit(pofile, 3, "Heading Three")
317        self.compareunit(pofile, 4, "Heading Four")
318        self.compareunit(pofile, 5, "Heading Five")
319        self.compareunit(pofile, 6, "Heading Six")
320
321    def test_dt(self):
322        """Test to see if the definition list title (dt) element is extracted"""
323        self.check_single(
324            "<html><head></head><body><dl><dt>Definition List Item Title</dt></dl></body></html>",
325            "Definition List Item Title",
326        )
327
328    def test_dd(self):
329        """Test to see if the definition list description (dd) element is extracted"""
330        self.check_single(
331            "<html><head></head><body><dl><dd>Definition List Item Description</dd></dl></body></html>",
332            "Definition List Item Description",
333        )
334
335    def test_span(self):
336        """test to check that we don't double extract a span item"""
337        self.check_single(
338            "<html><head></head><body><p>You are a <span>Spanish</span> sentence.</p></body></html>",
339            "You are a <span>Spanish</span> sentence.",
340        )
341
342    def test_ul(self):
343        """Test to see if the list item <li> is extracted"""
344        markup = "<html><head></head><body><ul><li>Unordered One</li><li>Unordered Two</li></ul><ol><li>Ordered One</li><li>Ordered Two</li></ol></body></html>"
345        pofile = self.html2po(markup)
346        self.countunits(pofile, 4)
347        self.compareunit(pofile, 1, "Unordered One")
348        self.compareunit(pofile, 2, "Unordered Two")
349        self.compareunit(pofile, 3, "Ordered One")
350        self.compareunit(pofile, 4, "Ordered Two")
351
352    def test_nested_lists(self):
353        """Nested lists should be extracted correctly"""
354        markup = """<!DOCTYPE html><html><head><title>Nested lists</title></head><body>
355<ul>
356    <li>Vegetables</li>
357    <li>Fruit
358        <ul>
359            <li>Bananas</li>
360            <li>Apples</li>
361            <li>Pears</li>
362        </ul>
363        yeah, that should be enough
364    </li>
365    <li>Meat</li>
366</ul>
367</body></html>"""
368        pofile = self.html2po(markup)
369        self.countunits(pofile, 8)
370        self.compareunit(pofile, 1, "Nested lists")
371        self.compareunit(pofile, 2, "Vegetables")
372        self.compareunit(pofile, 3, "Fruit")
373        self.compareunit(pofile, 4, "Bananas")
374        self.compareunit(pofile, 5, "Apples")
375        self.compareunit(pofile, 6, "Pears")
376        self.compareunit(pofile, 7, "yeah, that should be enough")
377        self.compareunit(pofile, 8, "Meat")
378
379    def test_duplicates(self):
380        """check that we use the default style of msgctxt to disambiguate duplicate messages"""
381        markup = (
382            "<html><head></head><body><p>Duplicate</p><p>Duplicate</p></body></html>"
383        )
384        pofile = self.html2po(markup)
385        self.countunits(pofile, 2)
386        # FIXME change this so that we check that the msgctxt is correctly added
387        self.compareunit(pofile, 1, "Duplicate")
388        assert pofile.units[1].getlocations() == ["None+html.body.p:1-26"]
389        self.compareunit(pofile, 2, "Duplicate")
390        assert pofile.units[2].getlocations() == ["None+html.body.p:1-42"]
391
392    def test_multiline_reflow(self):
393        """check that we reflow multiline content to make it more readable for translators"""
394        self.check_single(
395            """<td valign="middle" width="96%"><font class="headingwhite">South
396                  Africa</font></td>""",
397            """South Africa""",
398        )
399
400    def test_nested_tags(self):
401        """check that we can extract items within nested tags"""
402        markup = "<div><p>Extract this</p>And this</div>"
403        pofile = self.html2po(markup)
404        self.countunits(pofile, 2)
405        self.compareunit(pofile, 1, "Extract this")
406        self.compareunit(pofile, 2, "And this")
407
408    def test_carriage_return(self):
409        """Remove carriage returns from files in dos format."""
410        htmlsource = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\r
411<html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->\r
412<head>\r
413<!-- InstanceBeginEditable name="doctitle" -->\r
414<link href="fmfi.css" rel="stylesheet" type="text/css">\r
415</head>\r
416\r
417<body>\r
418<p>The rapid expansion of telecommunications infrastructure in recent\r
419years has helped to bridge the digital divide to a limited extent.</p> \r
420</body>\r
421<!-- InstanceEnd --></html>\r
422"""
423
424        self.check_single(
425            htmlsource,
426            "The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.",
427        )
428
429    def test_encoding_latin1(self):
430        """Convert HTML input in iso-8859-1 correctly to unicode."""
431        """Also verifies that the charset declaration isn't extracted as a translation unit."""
432        htmlsource = b"""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
433<html><!-- InstanceBegin template="/Templates/masterpage.dwt" codeOutsideHTMLIsLocked="false" -->
434<head>
435<!-- InstanceBeginEditable name="doctitle" -->
436<title>FMFI - South Africa - CSIR Openphone - Overview</title>
437<!-- InstanceEndEditable -->
438<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
439<meta name="keywords" content="fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community">
440
441<!-- InstanceBeginEditable name="head" -->
442<!-- InstanceEndEditable -->
443<link href="../../../fmfi.css" rel="stylesheet" type="text/css">
444</head>
445
446<body>
447<p>We aim to please \x96 will you aim too, please?</p>
448<p>South Africa\x92s language diversity can be challenging.</p>
449</body>
450</html>
451"""
452        pofile = self.html2po(htmlsource)
453        self.countunits(pofile, 4)
454        self.compareunit(pofile, 1, "FMFI - South Africa - CSIR Openphone - Overview")
455        self.compareunit(
456            pofile,
457            2,
458            "fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community",
459        )
460        self.compareunit(pofile, 3, "We aim to please \x96 will you aim too, please?")
461        self.compareunit(
462            pofile, 4, "South Africa\x92s language diversity can be challenging."
463        )
464
465    def test_strip_html(self):
466        """Ensure that unnecessary html is stripped from the resulting unit."""
467
468        htmlsource = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
469<html>
470<head>
471<title>FMFI - Contact</title>
472</head>
473<body>
474<table width="100%"  border="0" cellpadding="0" cellspacing="0">
475  <tr align="left" valign="top">
476    <td width="150" height="556">
477      <table width="157" height="100%" border="0" cellspacing="0" id="leftmenubg-color">
478      <tr>
479          <td align="left" valign="top" height="555">
480            <table width="100%" border="0" cellspacing="0" cellpadding="2">
481              <tr align="left" valign="top" bgcolor="#660000">
482                <td width="4%"><strong></strong></td>
483                <td width="96%"><strong><font class="headingwhite">Projects</font></strong></td>
484              </tr>
485              <tr align="left" valign="top">
486                <td valign="middle" width="4%"><img src="images/arrow.gif" width="8" height="8"></td>
487                <td width="96%"><a href="index.html">Home Page</a></td>
488              </tr>
489            </table>
490          </td>
491      </tr>
492      </table>
493    </td>
494  </tr>
495</table>
496</body>
497</html>
498"""
499        pofile = self.html2po(htmlsource)
500        self.countunits(pofile, 3)
501        self.compareunit(pofile, 2, "Projects")
502        self.compareunit(pofile, 3, "Home Page")
503
504        # Translate and convert back:
505        pofile.units[2].target = "Projekte"
506        pofile.units[3].target = "Tuisblad"
507        htmlresult = (
508            self.po2html(bytes(pofile), htmlsource)
509            .replace("\n", " ")
510            .replace('= "', '="')
511            .replace("> <", "><")
512        )
513        snippet = '<td width="96%"><strong><font class="headingwhite">Projekte</font></strong></td>'
514        assert snippet in htmlresult
515        snippet = '<td width="96%"><a href="index.html">Tuisblad</a></td>'
516        assert snippet in htmlresult
517
518    def test_entityrefs_in_text(self):
519        """Should extract html entityrefs, preserving the ones representing reserved characters"""
520        """`See <https://developer.mozilla.org/en-US/docs/Glossary/Entity>`."""
521        self.check_single(
522            "<html><head></head><body><p>&lt;not an element&gt; &amp; &quot; &apos; &rsquo;</p></body></html>",
523            "&lt;not an element&gt; &amp; \" ' \u2019",
524        )
525
526    def test_entityrefs_in_attributes(self):
527        """Should convert html entityrefs in attribute values"""
528        # it would be even nicer if &quot; and &apos; could be preserved, but the automatic unescaping of
529        # attributes is deep inside html.HTMLParser.
530        self.check_single(
531            '<html><head></head><body><img alt="&lt;not an element&gt; &amp; &quot; &apos; &rsquo;"></body></html>',
532            "<not an element> & \" ' \u2019",
533        )
534
535    def test_charrefs(self):
536        """Should extract html charrefs"""
537        self.check_single(
538            "<html><head></head><body><p>&#8217; &#x2019;</p></body></html>",
539            "\u2019 \u2019",
540        )
541
542    def test_php(self):
543        """Test that PHP snippets don't interfere"""
544
545        # A simple string
546        self.check_phpsnippet("""<?=$phpvariable?>""")
547
548        # Contains HTML tag characters (< and >)
549        self.check_phpsnippet("""<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>""")
550
551        # Make sure basically any symbol can be handled
552        # NOTE quotation mark removed since it violates the HTML format when placed in an attribute
553        self.check_phpsnippet(
554            """<? asdfghjkl qwertyuiop 1234567890!@#$%^&*()-=_+[]\\{}|;':,./<>? ?>"""
555        )
556
557    def test_multiple_php(self):
558        """Test multiple PHP snippets in a string to make sure they get restored properly"""
559        php1 = """<?=$phpvariable?>"""
560        php2 = """<?=($a < $b ? $foo : ($b > c ? $bar : $cat))?>"""
561        php3 = """<? asdfghjklqwertyuiop1234567890!@#$%^&*()-=_+[]\\{}|;':",./<>? ?>"""
562
563        # Put 3 different strings into an html string
564        innertext = (
565            '<a href="'
566            + php1
567            + '/site.html">Body text</a> and some '
568            + php2
569            + " more text "
570            + php2
571            + php3
572        )
573        htmlsource = "<html><head></head><body><p>" + innertext + "</p></body></html>"
574        self.check_single(htmlsource, innertext)
575
576    def test_php_multiline(self):
577
578        # A multi-line php string to test
579        php1 = """<? abc
580def
581ghi ?>"""
582
583        # Scatter the php strings throughout the file, and show what the translation should be
584        innertext = (
585            '<a href="'
586            + php1
587            + '/site.html">Body text</a> and some '
588            + php1
589            + " more text "
590            + php1
591            + php1
592        )
593        innertrans = (
594            '<a href="'
595            + php1
596            + '/site.html">Texte de corps</a> et encore de '
597            + php1
598            + " plus de texte "
599            + php1
600            + php1
601        )
602
603        htmlsource = (
604            "<html><head></head><body><p>" + innertext + "</p></body></html>"
605        )  # Current html file
606        transsource = (
607            "<html><head></head><body><p>" + innertrans + "</p></body></html>"
608        )  # Expected translation
609
610        pofile = self.html2po(htmlsource)
611        pofile.units[1].target = innertrans  # Register the translation in the PO file
612        htmlresult = self.po2html(pofile, htmlsource)
613        assert htmlresult == transsource
614
615    def test_php_with_embedded_html(self):
616        """Should not consume HTML within processing instructions"""
617        self.check_single(
618            "<html><head></head><body><p>a <? <p>b</p> ?> c</p></body></html>",
619            "a <? <p>b</p> ?> c",
620        )
621
622    def test_comments(self):
623        """Test that HTML comments are converted to translator notes in output"""
624        pofile = self.html2po(
625            "<!-- comment outside block --><p><!-- a comment -->A paragraph<!-- with another comment -->.</p>",
626            keepcomments=True,
627        )
628        self.compareunit(pofile, 1, "A paragraph.")
629        notes = pofile.getunits()[-1].getnotes()
630        assert str(notes) == " a comment \n with another comment "
631
632    def test_attribute_without_value(self):
633        htmlsource = """<ul>
634                <li><a href="logoColor.eps" download>EPS färg</a></li>
635            </ul>
636"""
637        pofile = self.html2po(htmlsource)
638        self.compareunit(pofile, 1, "EPS färg")
639
640
641class TestHTML2POCommand(test_convert.TestConvertCommand, TestHTML2PO):
642    """Tests running actual html2po commands on files"""
643
644    convertmodule = html2po
645    defaultoptions = {"progress": "none"}
646
647    def test_multifile_single(self):
648        """Test the --multifile=single option and make sure it produces one pot file per input file."""
649        self.create_testfile(
650            "file1.html", "<div>You are only coming through in waves</div>"
651        )
652        self.create_testfile(
653            "file2.html", "<div>Your lips move but I cannot hear what you say</div>"
654        )
655        self.run_command("./", "pots", pot=True, multifile="single")
656        assert os.path.isfile(self.get_testfilename("pots/file1.pot"))
657        assert os.path.isfile(self.get_testfilename("pots/file2.pot"))
658        content = str(self.read_testfile("pots/file1.pot"))
659        assert "coming through" in content
660        assert "cannot hear" not in content
661
662    def test_multifile_onefile(self):
663        """Test the --multifile=onefile option and make sure it produces a file, not a directory."""
664        self.create_testfile(
665            "file1.html", "<div>You are only coming through in waves</div>"
666        )
667        self.create_testfile(
668            "file2.html", "<div>Your lips move but I cannot hear what you say</div>"
669        )
670        self.run_command("./", "one.pot", pot=True, multifile="onefile")
671        assert os.path.isfile(self.get_testfilename("one.pot"))
672        content = str(self.read_testfile("one.pot"))
673        assert "coming through" in content
674        assert "cannot hear" in content
675
676    def test_multifile_onefile_to_stdout(self, capsys):
677        """Test the --multifile=onefile option without specifying an output file. Default is stdout."""
678        self.create_testfile(
679            "file1.html", "<div>You are only coming through in waves</div>"
680        )
681        self.create_testfile(
682            "file2.html", "<div>Your lips move but I cannot hear what you say</div>"
683        )
684        self.run_command("./", pot=True, multifile="onefile")
685        content, err = capsys.readouterr()
686        assert "coming through" in content
687        assert "cannot hear" in content
688        assert err == ""
689
690    def test_help(self, capsys):
691        """Test getting help."""
692        options = super().test_help(capsys)
693        options = self.help_check(options, "-P, --pot")
694        options = self.help_check(options, "--duplicates=DUPLICATESTYLE")
695        options = self.help_check(options, "--keepcomments")
696        options = self.help_check(options, "--multifile=MULTIFILESTYLE", last=True)
697