1defmodule HtmlSanitizeEx.Scrubber.Meta do 2 @moduledoc """ 3 This module contains some meta-programming magic to define your own rules 4 for scrubbers. 5 6 The StripTags scrubber is a good starting point: 7 8 defmodule HtmlSanitizeEx.Scrubber.StripTags do 9 require HtmlSanitizeEx.Scrubber.Meta 10 alias HtmlSanitizeEx.Scrubber.Meta 11 12 # Removes any CDATA tags before the traverser/scrubber runs. 13 Meta.remove_cdata_sections_before_scrub 14 15 Meta.strip_comments 16 17 Meta.strip_everything_not_covered 18 end 19 20 You can use the `allow_tag_with_uri_attributes/3` and 21 `allow_tag_with_these_attributes/2` macros to define what is allowed: 22 23 defmodule HtmlSanitizeEx.Scrubber.StripTags do 24 require HtmlSanitizeEx.Scrubber.Meta 25 alias HtmlSanitizeEx.Scrubber.Meta 26 27 # Removes any CDATA tags before the traverser/scrubber runs. 28 Meta.remove_cdata_sections_before_scrub 29 30 Meta.strip_comments 31 32 Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"] 33 Meta.allow_tag_with_these_attributes "img", ["width", "height"] 34 35 Meta.strip_everything_not_covered 36 end 37 38 You can stack these if convenient: 39 40 Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"] 41 Meta.allow_tag_with_these_attributes "img", ["width", "height"] 42 Meta.allow_tag_with_these_attributes "img", ["title", "alt"] 43 44 """ 45 46 @doc """ 47 Allow these tags and use the regular `scrub_attribute/2` function to scrub 48 the attributes. 49 """ 50 defmacro allow_tags_and_scrub_their_attributes(list) do 51 Enum.map(list, fn tag_name -> allow_this_tag_and_scrub_its_attributes(tag_name) end) 52 end 53 54 @doc """ 55 Allow the given +list+ of attributes for the specified +tag+. 56 57 Meta.allow_tag_with_these_attributes "a", ["name", "title"] 58 59 Meta.allow_tag_with_these_attributes "img", ["title", "alt"] 60 """ 61 defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do 62 list 63 |> Enum.map(fn attr_name -> allow_this_tag_with_this_attribute(tag_name, attr_name) end) 64 |> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)]) 65 end 66 67 @doc """ 68 Allow the given list of +values+ for the given +attribute+ on the 69 specified +tag+. 70 71 Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"] 72 """ 73 defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do 74 quote do 75 def scrub_attribute(unquote(tag_name), {unquote(attribute), value}) 76 when value in unquote(values) do 77 {unquote(attribute), value} 78 end 79 end 80 end 81 82 @doc """ 83 Allow the given +list+ of attributes to contain URI information for the 84 specified +tag+. 85 86 # Only allow SSL-enabled and mailto links 87 Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"] 88 89 # Only allow none-SSL images 90 Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"] 91 """ 92 defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do 93 list 94 |> Enum.map(fn name -> allow_tag_with_uri_attribute(tag, name, valid_schemes) end) 95 end 96 97 @doc """ 98 99 """ 100 defmacro allow_tags_with_style_attributes(list) do 101 list 102 |> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end) 103 end 104 105 @doc """ 106 Removes any CDATA tags before the traverser/scrubber runs. 107 """ 108 defmacro remove_cdata_sections_before_scrub do 109 quote do 110 def before_scrub(html), do: String.replace(html, "<![CDATA[", "") 111 end 112 end 113 114 @doc """ 115 Strips all comments. 116 """ 117 defmacro strip_comments do 118 quote do 119 def scrub({:comment, children}), do: "" 120 end 121 end 122 123 @doc """ 124 Ensures any tags/attributes not explicitly whitelisted until this 125 statement are stripped. 126 """ 127 defmacro strip_everything_not_covered do 128 replacement_linebreak = "#{HtmlSanitizeEx.Parser.replacement_for_linebreak}" 129 replacement_space = "#{HtmlSanitizeEx.Parser.replacement_for_space}" 130 replacement_tab = "#{HtmlSanitizeEx.Parser.replacement_for_tab}" 131 132 quote do 133 # If we haven't covered the attribute until here, we just scrab it. 134 def scrub_attribute(_tag, _attribute), do: nil 135 136 # If we haven't covered the attribute until here, we just scrab it. 137 def scrub({_tag, _attributes, children}), do: children 138 139 def scrub({_tag, children}), do: children 140 141 def scrub(unquote(" " <> replacement_linebreak <> " ") <> text), do: text 142 def scrub(unquote(" " <> replacement_space <> " ") <> text), do: " " <> text 143 def scrub(unquote(" " <> replacement_tab <> " ") <> text), do: text 144 145 # Text is left alone 146 def scrub("" <> text), do: text 147 end 148 end 149 150 151 152 defp allow_this_tag_and_scrub_its_attributes(tag_name) do 153 quote do 154 def scrub({unquote(tag_name), attributes, children}) do 155 {unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children} 156 end 157 158 defp scrub_attributes(unquote(tag_name), attributes) do 159 Enum.map(attributes, fn(attr) -> scrub_attribute(unquote(tag_name), attr) end) 160 |> Enum.reject(&(is_nil(&1))) 161 end 162 end 163 end 164 165 defp allow_this_tag_with_this_attribute(tag_name, attr_name) do 166 quote do 167 def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do 168 {unquote(attr_name), value} 169 end 170 end 171 end 172 173 defp allow_this_tag_with_style_attribute(tag_name) do 174 quote do 175 def scrub_attribute(unquote(tag_name), {"style", value}) do 176 {"style", scrub_css(value)} 177 end 178 end 179 end 180 181 defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do 182 quote do 183 def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do 184 nil 185 end 186 187 @protocol_separator ~r/:|(�*58)|(p)|(�*3a)|(%|%)3A/mi 188 @scheme_capture ~r/(.+?)(:|(�*58)|(p)|(�*3a)|(%|%)3A)/mi 189 190 def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do 191 valid_schema = if String.match?(uri, @protocol_separator) do 192 case Regex.run(@scheme_capture, uri) do 193 [_, scheme, _] -> 194 Enum.any?(unquote(valid_schemes), fn x -> x == scheme end) 195 nil -> 196 false 197 end 198 else 199 true 200 end 201 if valid_schema, do: {unquote(attr_name), uri} 202 end 203 end 204 end 205end 206