1{-# LANGUAGE OverloadedStrings #-}
2{- |
3   Module      : Tests.Readers.HTML
4   Copyright   : © 2006-2021 John MacFarlane
5   License     : GNU GPL, version 2 or above
6
7   Maintainer  : John MacFarlane <jgm@berkeley.edu>
8   Stability   : alpha
9   Portability : portable
10
11Tests for the HTML reader.
12-}
13module Tests.Readers.HTML (tests) where
14
15import Data.Text (Text)
16import qualified Data.Text as T
17import Test.Tasty
18import Test.Tasty.QuickCheck
19import Test.Tasty.Options (IsOption(defaultValue))
20import Tests.Helpers
21import Text.Pandoc
22import Text.Pandoc.Shared (isHeaderBlock)
23import Text.Pandoc.Arbitrary ()
24import Text.Pandoc.Builder
25import Text.Pandoc.Walk (walk)
26
27html :: Text -> Pandoc
28html = purely $ readHtml def
29
30htmlNativeDivs :: Text -> Pandoc
31htmlNativeDivs = purely $ readHtml def { readerExtensions = enableExtension Ext_native_divs $ readerExtensions def }
32
33makeRoundTrip :: Block -> Block
34makeRoundTrip CodeBlock{} = Para [Str "code block was here"]
35makeRoundTrip LineBlock{} = Para [Str "line block was here"]
36makeRoundTrip RawBlock{} = Para [Str "raw block was here"]
37makeRoundTrip (Div attr bs) = Div attr $ filter (not . isHeaderBlock) bs
38-- avoids round-trip failures related to makeSections
39-- e.g. with [Div ("loc",[],[("a","11"),("b_2","a b c")]) [Header 3 ("",[],[]) []]]
40makeRoundTrip Table{} = Para [Str "table block was here"]
41makeRoundTrip x           = x
42
43removeRawInlines :: Inline -> Inline
44removeRawInlines RawInline{} = Str "raw inline was here"
45removeRawInlines x           = x
46
47roundTrip :: Blocks -> Bool
48roundTrip b = d'' == d'''
49  where d = walk removeRawInlines $
50            walk makeRoundTrip $ Pandoc nullMeta $ toList b
51        d' = rewrite d
52        d'' = rewrite d'
53        d''' = rewrite d''
54        rewrite = html . (`T.snoc` '\n') .
55                  purely (writeHtml5String def
56                            { writerWrapText = WrapPreserve })
57
58tests :: [TestTree]
59tests = [ testGroup "base tag"
60          [ test html "simple" $
61            "<head><base href=\"http://www.w3schools.com/images/foo\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
62            plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
63          , test html "slash at end of base" $
64            "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?>
65            plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman"))
66          , test html "slash at beginning of href" $
67            "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"/stickman.gif\" alt=\"Stickman\"></head>" =?>
68            plain (image "http://www.w3schools.com/stickman.gif" "" (text "Stickman"))
69          , test html "absolute URL" $
70            "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?>
71            plain (image "http://example.com/stickman.gif" "" (text "Stickman"))
72          ]
73        , testGroup "anchors"
74          [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?>
75            plain (spanWith ("anchor",[],[]) mempty)
76          ]
77        , testGroup "img"
78          [ test html "data-external attribute" $ "<img data-external=\"1\" src=\"http://example.com/stickman.gif\">" =?>
79            plain (imageWith ("", [], [("external", "1")]) "http://example.com/stickman.gif" "" "")
80          , test html "title" $ "<img title=\"The title\" src=\"http://example.com/stickman.gif\">" =?>
81            plain (imageWith ("", [], []) "http://example.com/stickman.gif" "The title" "")
82          ]
83        , testGroup "lang"
84          [ test html "lang on <html>" $ "<html lang=\"es\">hola" =?>
85            setMeta "lang" (text "es") (doc (plain (text "hola")))
86          , test html "xml:lang on <html>" $ "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"es\"><head></head><body>hola</body></html>" =?>
87            setMeta "lang" (text "es") (doc (plain (text "hola")))
88          ]
89        , testGroup "main"
90          [ test htmlNativeDivs "<main> becomes <div role=main>" $ "<main>hello</main>" =?>
91            doc (divWith ("", [], [("role", "main")]) (plain (text "hello")))
92          , test htmlNativeDivs "<main role=X> becomes <div role=X>" $ "<main role=foobar>hello</main>" =?>
93            doc (divWith ("", [], [("role", "foobar")]) (plain (text "hello")))
94          , test htmlNativeDivs "<main> has attributes preserved" $ "<main id=foo class=bar data-baz=qux>hello</main>" =?>
95            doc (divWith ("foo", ["bar"], [("role", "main"), ("baz", "qux")]) (plain (text "hello")))
96          , test htmlNativeDivs "<main> closes <p>" $ "<p>hello<main>main content</main>" =?>
97            doc (para (text "hello") <> divWith ("", [], [("role", "main")]) (plain (text "main content")))
98          , test htmlNativeDivs "<main> followed by text" $ "<main>main content</main>non-main content" =?>
99            doc (divWith ("", [], [("role", "main")]) (plain (text "main content")) <> plain (text "non-main content"))
100          ]
101        , testGroup "samp"
102          [
103            test html "inline samp block" $
104            "<samp>Answer is 42</samp>" =?>
105            plain (codeWith ("",["sample"],[]) "Answer is 42")
106          ]
107        , testGroup "var"
108          [ test html "inline var block" $
109            "<var>result</var>" =?>
110            plain (codeWith ("",["variable"],[]) "result")
111          ]
112        , testGroup "header"
113          [ test htmlNativeDivs "<header> is parsed as a div" $
114            "<header id=\"title\">Title</header>" =?>
115            divWith ("title", mempty, mempty) (plain "Title")
116          ]
117        , testGroup "code block"
118          [ test html "attributes in pre > code element" $
119            "<pre><code id=\"a\" class=\"python\">\nprint('hi')\n</code></pre>"
120            =?>
121            codeBlockWith ("a", ["python"], []) "print('hi')"
122
123          , test html "attributes in pre take precendence" $
124            "<pre id=\"c\"><code id=\"d\">\nprint('hi mom!')\n</code></pre>"
125            =?>
126            codeBlockWith ("c", [], []) "print('hi mom!')"
127          ]
128        , askOption $ \(QuickCheckTests numtests) ->
129            testProperty "Round trip" $
130              withMaxSuccess (if QuickCheckTests numtests == defaultValue
131                                 then 25
132                                 else numtests) roundTrip
133        ]
134