1{-# LANGUAGE OverloadedStrings #-} 2{- | 3 Module : Tests.Readers.HTML 4 Copyright : © 2006-2021 John MacFarlane 5 License : GNU GPL, version 2 or above 6 7 Maintainer : John MacFarlane <jgm@berkeley.edu> 8 Stability : alpha 9 Portability : portable 10 11Tests for the HTML reader. 12-} 13module Tests.Readers.HTML (tests) where 14 15import Data.Text (Text) 16import qualified Data.Text as T 17import Test.Tasty 18import Test.Tasty.QuickCheck 19import Test.Tasty.Options (IsOption(defaultValue)) 20import Tests.Helpers 21import Text.Pandoc 22import Text.Pandoc.Shared (isHeaderBlock) 23import Text.Pandoc.Arbitrary () 24import Text.Pandoc.Builder 25import Text.Pandoc.Walk (walk) 26 27html :: Text -> Pandoc 28html = purely $ readHtml def 29 30htmlNativeDivs :: Text -> Pandoc 31htmlNativeDivs = purely $ readHtml def { readerExtensions = enableExtension Ext_native_divs $ readerExtensions def } 32 33makeRoundTrip :: Block -> Block 34makeRoundTrip CodeBlock{} = Para [Str "code block was here"] 35makeRoundTrip LineBlock{} = Para [Str "line block was here"] 36makeRoundTrip RawBlock{} = Para [Str "raw block was here"] 37makeRoundTrip (Div attr bs) = Div attr $ filter (not . isHeaderBlock) bs 38-- avoids round-trip failures related to makeSections 39-- e.g. with [Div ("loc",[],[("a","11"),("b_2","a b c")]) [Header 3 ("",[],[]) []]] 40makeRoundTrip Table{} = Para [Str "table block was here"] 41makeRoundTrip x = x 42 43removeRawInlines :: Inline -> Inline 44removeRawInlines RawInline{} = Str "raw inline was here" 45removeRawInlines x = x 46 47roundTrip :: Blocks -> Bool 48roundTrip b = d'' == d''' 49 where d = walk removeRawInlines $ 50 walk makeRoundTrip $ Pandoc nullMeta $ toList b 51 d' = rewrite d 52 d'' = rewrite d' 53 d''' = rewrite d'' 54 rewrite = html . (`T.snoc` '\n') . 55 purely (writeHtml5String def 56 { writerWrapText = WrapPreserve }) 57 58tests :: [TestTree] 59tests = [ testGroup "base tag" 60 [ test html "simple" $ 61 "<head><base href=\"http://www.w3schools.com/images/foo\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?> 62 plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) 63 , test html "slash at end of base" $ 64 "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"stickman.gif\" alt=\"Stickman\"></head>" =?> 65 plain (image "http://www.w3schools.com/images/stickman.gif" "" (text "Stickman")) 66 , test html "slash at beginning of href" $ 67 "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"/stickman.gif\" alt=\"Stickman\"></head>" =?> 68 plain (image "http://www.w3schools.com/stickman.gif" "" (text "Stickman")) 69 , test html "absolute URL" $ 70 "<head><base href=\"http://www.w3schools.com/images/\" ></head><body><img src=\"http://example.com/stickman.gif\" alt=\"Stickman\"></head>" =?> 71 plain (image "http://example.com/stickman.gif" "" (text "Stickman")) 72 ] 73 , testGroup "anchors" 74 [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?> 75 plain (spanWith ("anchor",[],[]) mempty) 76 ] 77 , testGroup "img" 78 [ test html "data-external attribute" $ "<img data-external=\"1\" src=\"http://example.com/stickman.gif\">" =?> 79 plain (imageWith ("", [], [("external", "1")]) "http://example.com/stickman.gif" "" "") 80 , test html "title" $ "<img title=\"The title\" src=\"http://example.com/stickman.gif\">" =?> 81 plain (imageWith ("", [], []) "http://example.com/stickman.gif" "The title" "") 82 ] 83 , testGroup "lang" 84 [ test html "lang on <html>" $ "<html lang=\"es\">hola" =?> 85 setMeta "lang" (text "es") (doc (plain (text "hola"))) 86 , test html "xml:lang on <html>" $ "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"es\"><head></head><body>hola</body></html>" =?> 87 setMeta "lang" (text "es") (doc (plain (text "hola"))) 88 ] 89 , testGroup "main" 90 [ test htmlNativeDivs "<main> becomes <div role=main>" $ "<main>hello</main>" =?> 91 doc (divWith ("", [], [("role", "main")]) (plain (text "hello"))) 92 , test htmlNativeDivs "<main role=X> becomes <div role=X>" $ "<main role=foobar>hello</main>" =?> 93 doc (divWith ("", [], [("role", "foobar")]) (plain (text "hello"))) 94 , test htmlNativeDivs "<main> has attributes preserved" $ "<main id=foo class=bar data-baz=qux>hello</main>" =?> 95 doc (divWith ("foo", ["bar"], [("role", "main"), ("baz", "qux")]) (plain (text "hello"))) 96 , test htmlNativeDivs "<main> closes <p>" $ "<p>hello<main>main content</main>" =?> 97 doc (para (text "hello") <> divWith ("", [], [("role", "main")]) (plain (text "main content"))) 98 , test htmlNativeDivs "<main> followed by text" $ "<main>main content</main>non-main content" =?> 99 doc (divWith ("", [], [("role", "main")]) (plain (text "main content")) <> plain (text "non-main content")) 100 ] 101 , testGroup "samp" 102 [ 103 test html "inline samp block" $ 104 "<samp>Answer is 42</samp>" =?> 105 plain (codeWith ("",["sample"],[]) "Answer is 42") 106 ] 107 , testGroup "var" 108 [ test html "inline var block" $ 109 "<var>result</var>" =?> 110 plain (codeWith ("",["variable"],[]) "result") 111 ] 112 , testGroup "header" 113 [ test htmlNativeDivs "<header> is parsed as a div" $ 114 "<header id=\"title\">Title</header>" =?> 115 divWith ("title", mempty, mempty) (plain "Title") 116 ] 117 , testGroup "code block" 118 [ test html "attributes in pre > code element" $ 119 "<pre><code id=\"a\" class=\"python\">\nprint('hi')\n</code></pre>" 120 =?> 121 codeBlockWith ("a", ["python"], []) "print('hi')" 122 123 , test html "attributes in pre take precendence" $ 124 "<pre id=\"c\"><code id=\"d\">\nprint('hi mom!')\n</code></pre>" 125 =?> 126 codeBlockWith ("c", [], []) "print('hi mom!')" 127 ] 128 , askOption $ \(QuickCheckTests numtests) -> 129 testProperty "Round trip" $ 130 withMaxSuccess (if QuickCheckTests numtests == defaultValue 131 then 25 132 else numtests) roundTrip 133 ] 134