A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.

tag""" self.check_single( "

A paragraph.

", "A paragraph.", ) markup = "

First line.
Second line.

" pofile = self.html2po(markup) self.compareunit(pofile, 1, "First line.
Second line.") def test_tag_div_with_linebreaks(self): """Test linebreaks within a

tag.""" htmltext = """

""" self.check_single( htmltext, "A paragraph is a section in a piece of writing, usually highlighting a particular point or topic. It always begins on a new line and usually with indentation, and it consists of at least one sentence.", ) markup = "

First\nline.
Second\nline.

" pofile = self.html2po(markup) self.compareunit(pofile, 1, "First line.
Second line.") def test_tag_a(self): """test that we can extract the tag""" self.check_single( '

A paragraph with hyperlink.

', 'A paragraph with hyperlink.', ) def test_tag_a_with_linebreak(self): """Test that we can extract the tag with newlines in it.""" htmltext = """

A paragraph with hyperlink and newlines.

""" self.check_single( htmltext, 'A paragraph with hyperlink and newlines.', ) def test_sequence_of_anchor_elements(self): """test that we can extract a sequence of anchor elements without mixing up start/end tags, issue #3768""" self.check_single( '

This is a link but this is not. However this is too

', 'This is a link but this is not. However this is too', ) def test_tag_img(self): """Test that we can extract the alt attribute from the tag.""" self.check_single( """

""", "A picture", ) def test_img_empty(self): """Test that we can extract the alt attribute from the tag.""" htmlsource = """

""" self.check_null(htmlsource) def test_tag_img_inside_a(self): """Test that we can extract the alt attribute from the tag when the img is embedded in a link.""" self.check_single( """

""", "A picture", ) def test_tag_table_summary(self): """Test that we can extract the summary attribute.""" self.check_single( """

""", "Table summary", ) def test_table_simple(self): """Test that we can fully extract a simple table.""" markup = """

Heading One	Heading Two
One	Two

""" pofile = self.html2po(markup) self.countunits(pofile, 4) self.compareunit(pofile, 1, "Heading One") self.compareunit(pofile, 2, "Heading Two") self.compareunit(pofile, 3, "One") self.compareunit(pofile, 4, "Two") def test_table_complex(self): markup = """

A caption
Heading One	Heading Two
Foot One	Foot Two
One	Two

""" pofile = self.html2po(markup) self.countunits(pofile, 9) self.compareunit(pofile, 1, "This is the summary") self.compareunit(pofile, 2, "A caption") self.compareunit(pofile, 3, "Head 1") self.compareunit(pofile, 4, "Heading One") self.compareunit(pofile, 5, "Heading Two") self.compareunit(pofile, 6, "Foot One") self.compareunit(pofile, 7, "Foot Two") self.compareunit(pofile, 8, "One") self.compareunit(pofile, 9, "Two") def test_table_empty(self): """Test that we ignore tables that are empty. A table is deemed empty if it has no translatable content. """ self.check_null( """

""" ) self.check_null( """

""" ) def test_address(self): """Test to see if the address element is extracted""" self.check_single("

My address

", "My address") def test_headings(self): """Test to see if the h* elements are extracted""" markup = "

Heading One

Heading Two

Heading Three

Heading Four

Heading Five

Heading Six

" pofile = self.html2po(markup) self.countunits(pofile, 6) self.compareunit(pofile, 1, "Heading One") self.compareunit(pofile, 2, "Heading Two") self.compareunit(pofile, 3, "Heading Three") self.compareunit(pofile, 4, "Heading Four") self.compareunit(pofile, 5, "Heading Five") self.compareunit(pofile, 6, "Heading Six") def test_headings_with_linebreaks(self): """Test to see if h* elements with newlines can be extracted""" markup = "

Heading\nOne

Heading\nTwo

Heading\nThree

Heading\nFour

Heading\nFive

Heading\nSix

" pofile = self.html2po(markup) self.countunits(pofile, 6) self.compareunit(pofile, 1, "Heading One") self.compareunit(pofile, 2, "Heading Two") self.compareunit(pofile, 3, "Heading Three") self.compareunit(pofile, 4, "Heading Four") self.compareunit(pofile, 5, "Heading Five") self.compareunit(pofile, 6, "Heading Six") def test_dt(self): """Test to see if the definition list title (dt) element is extracted""" self.check_single( "

Definition List Item Title

", "Definition List Item Title", ) def test_dd(self): """Test to see if the definition list description (dd) element is extracted""" self.check_single( "

Definition List Item Description

", "Definition List Item Description", ) def test_span(self): """test to check that we don't double extract a span item""" self.check_single( "

You are a Spanish sentence.

", "You are a Spanish sentence.", ) def test_ul(self): """Test to see if the list item

is extracted""" markup = "

Unordered One
Unordered Two

Ordered One
Ordered Two

" pofile = self.html2po(markup) self.countunits(pofile, 4) self.compareunit(pofile, 1, "Unordered One") self.compareunit(pofile, 2, "Unordered Two") self.compareunit(pofile, 3, "Ordered One") self.compareunit(pofile, 4, "Ordered Two") def test_nested_lists(self): """Nested lists should be extracted correctly""" markup = """Nested lists

Vegetables
Fruit
- Bananas
- Apples
- Pears
yeah, that should be enough
Meat

""" pofile = self.html2po(markup) self.countunits(pofile, 8) self.compareunit(pofile, 1, "Nested lists") self.compareunit(pofile, 2, "Vegetables") self.compareunit(pofile, 3, "Fruit") self.compareunit(pofile, 4, "Bananas") self.compareunit(pofile, 5, "Apples") self.compareunit(pofile, 6, "Pears") self.compareunit(pofile, 7, "yeah, that should be enough") self.compareunit(pofile, 8, "Meat") def test_duplicates(self): """check that we use the default style of msgctxt to disambiguate duplicate messages""" markup = ( "

Duplicate

" ) pofile = self.html2po(markup) self.countunits(pofile, 2) # FIXME change this so that we check that the msgctxt is correctly added self.compareunit(pofile, 1, "Duplicate") assert pofile.units[1].getlocations() == ["None+html.body.p:1-26"] self.compareunit(pofile, 2, "Duplicate") assert pofile.units[2].getlocations() == ["None+html.body.p:1-42"] def test_multiline_reflow(self): """check that we reflow multiline content to make it more readable for translators""" self.check_single( """South Africa""", """South Africa""", ) def test_nested_tags(self): """check that we can extract items within nested tags""" markup = "

Extract this

And this

" pofile = self.html2po(markup) self.countunits(pofile, 2) self.compareunit(pofile, 1, "Extract this") self.compareunit(pofile, 2, "And this") def test_carriage_return(self): """Remove carriage returns from files in dos format.""" htmlsource = """\r \r \r \r \r \r \r \r

The rapid expansion of telecommunications infrastructure in recent\r years has helped to bridge the digital divide to a limited extent.

\r \r \r """ self.check_single( htmlsource, "The rapid expansion of telecommunications infrastructure in recent years has helped to bridge the digital divide to a limited extent.", ) def test_encoding_latin1(self): """Convert HTML input in iso-8859-1 correctly to unicode.""" """Also verifies that the charset declaration isn't extracted as a translation unit.""" htmlsource = b""" FMFI - South Africa - CSIR Openphone - Overview

We aim to please \x96 will you aim too, please?

South Africa\x92s language diversity can be challenging.

""" pofile = self.html2po(htmlsource) self.countunits(pofile, 4) self.compareunit(pofile, 1, "FMFI - South Africa - CSIR Openphone - Overview") self.compareunit( pofile, 2, "fmfi, first mile, first inch, wireless, rural development, access devices, mobile devices, wifi, connectivity, rural connectivty, ict, low cost, cheap, digital divide, csir, idrc, community", ) self.compareunit(pofile, 3, "We aim to please \x96 will you aim too, please?") self.compareunit( pofile, 4, "South Africa\x92s language diversity can be challenging." ) def test_strip_html(self): """Ensure that unnecessary html is stripped from the resulting unit.""" htmlsource = """ FMFI - Contact

	Projects
	Home Page

""" pofile = self.html2po(htmlsource) self.countunits(pofile, 3) self.compareunit(pofile, 2, "Projects") self.compareunit(pofile, 3, "Home Page") # Translate and convert back: pofile.units[2].target = "Projekte" pofile.units[3].target = "Tuisblad" htmlresult = ( self.po2html(bytes(pofile), htmlsource) .replace("\n", " ") .replace('= "', '="') .replace("> <", "><") ) snippet = 'Projekte' assert snippet in htmlresult snippet = 'Tuisblad' assert snippet in htmlresult def test_entityrefs_in_text(self): """Should extract html entityrefs, preserving the ones representing reserved characters""" """`See `.""" self.check_single( "

<not an element> & " ' ’

", "<not an element> & \" ' \u2019", ) def test_entityrefs_in_attributes(self): """Should convert html entityrefs in attribute values""" # it would be even nicer if " and ' could be preserved, but the automatic unescaping of # attributes is deep inside html.HTMLParser. self.check_single( ' <not an element> & " ' ’

', " & \" ' \u2019", ) def test_charrefs(self): """Should extract html charrefs""" self.check_single( "

’ ’

", "\u2019 \u2019", ) def test_php(self): """Test that PHP snippets don't interfere""" # A simple string self.check_phpsnippet("""""") # Contains HTML tag characters (< and >) self.check_phpsnippet(""" c ? $bar : $cat))?>""") # Make sure basically any symbol can be handled # NOTE quotation mark removed since it violates the HTML format when placed in an attribute self.check_phpsnippet( """? ?>""" ) def test_multiple_php(self): """Test multiple PHP snippets in a string to make sure they get restored properly""" php1 = """""" php2 = """ c ? $bar : $cat))?>""" php3 = """? ?>""" # Put 3 different strings into an html string innertext = ( 'Body text and some ' + php2 + " more text " + php2 + php3 ) htmlsource = "

" + innertext + "

" self.check_single(htmlsource, innertext) def test_php_multiline(self): # A multi-line php string to test php1 = """""" # Scatter the php strings throughout the file, and show what the translation should be innertext = ( 'Body text and some ' + php1 + " more text " + php1 + php1 ) innertrans = ( 'Texte de corps et encore de ' + php1 + " plus de texte " + php1 + php1 ) htmlsource = ( "

" + innertext + "

" ) # Current html file transsource = ( "

" + innertrans + "

" ) # Expected translation pofile = self.html2po(htmlsource) pofile.units[1].target = innertrans # Register the translation in the PO file htmlresult = self.po2html(pofile, htmlsource) assert htmlresult == transsource def test_php_with_embedded_html(self): """Should not consume HTML within processing instructions""" self.check_single( "

a b

?> c

", "a b

?> c", ) def test_comments(self): """Test that HTML comments are converted to translator notes in output""" pofile = self.html2po( "

A paragraph.

", keepcomments=True, ) self.compareunit(pofile, 1, "A paragraph.") notes = pofile.getunits()[-1].getnotes() assert str(notes) == " a comment \n with another comment " def test_attribute_without_value(self): htmlsource = """

EPS färg

""" pofile = self.html2po(htmlsource) self.compareunit(pofile, 1, "EPS färg") class TestHTML2POCommand(test_convert.TestConvertCommand, TestHTML2PO): """Tests running actual html2po commands on files""" convertmodule = html2po defaultoptions = {"progress": "none"} def test_multifile_single(self): """Test the --multifile=single option and make sure it produces one pot file per input file.""" self.create_testfile( "file1.html", "

You are only coming through in waves

" ) self.create_testfile( "file2.html", "

Your lips move but I cannot hear what you say

" ) self.run_command("./", "pots", pot=True, multifile="single") assert os.path.isfile(self.get_testfilename("pots/file1.pot")) assert os.path.isfile(self.get_testfilename("pots/file2.pot")) content = str(self.read_testfile("pots/file1.pot")) assert "coming through" in content assert "cannot hear" not in content def test_multifile_onefile(self): """Test the --multifile=onefile option and make sure it produces a file, not a directory.""" self.create_testfile( "file1.html", "

You are only coming through in waves

" ) self.create_testfile( "file2.html", "

Your lips move but I cannot hear what you say

" ) self.run_command("./", "one.pot", pot=True, multifile="onefile") assert os.path.isfile(self.get_testfilename("one.pot")) content = str(self.read_testfile("one.pot")) assert "coming through" in content assert "cannot hear" in content def test_multifile_onefile_to_stdout(self, capsys): """Test the --multifile=onefile option without specifying an output file. Default is stdout.""" self.create_testfile( "file1.html", "

You are only coming through in waves

" ) self.create_testfile( "file2.html", "

Your lips move but I cannot hear what you say

" ) self.run_command("./", pot=True, multifile="onefile") content, err = capsys.readouterr() assert "coming through" in content assert "cannot hear" in content assert err == "" def test_help(self, capsys): """Test getting help.""" options = super().test_help(capsys) options = self.help_check(options, "-P, --pot") options = self.help_check(options, "--duplicates=DUPLICATESTYLE") options = self.help_check(options, "--keepcomments") options = self.help_check(options, "--multifile=MULTIFILESTYLE", last=True)