1// Copyright 2017 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5package webpagereplay 6 7import ( 8 "bytes" 9 "compress/flate" 10 "compress/gzip" 11 "crypto/sha256" 12 "encoding/base64" 13 "encoding/json" 14 "errors" 15 "fmt" 16 "io" 17 "io/ioutil" 18 "log" 19 "net/http" 20 "net/url" 21 "regexp" 22 "strconv" 23 "strings" 24) 25 26type readerWithError struct { 27 r io.Reader 28 err error 29} 30 31func (r *readerWithError) Read(p []byte) (int, error) { 32 n, err := r.r.Read(p) 33 if err == io.EOF && r.err != nil { 34 err = r.err 35 } 36 return n, err 37} 38 39// cloneHeaders clones h. 40func cloneHeaders(h http.Header) http.Header { 41 hh := make(http.Header, len(h)) 42 for k, vv := range h { 43 if vv == nil { 44 hh[k] = nil 45 } else { 46 hh[k] = append([]string{}, vv...) 47 } 48 } 49 return hh 50} 51 52// transformResponseBody applies a transformation function to the response 53// body. 54// tf is passed an uncompressed body and should return an uncompressed body. 55// The final response will be compressed if allowed by 56// resp.Header[ContentEncoding]. 57func transformResponseBody(resp *http.Response, f func([]byte) []byte) error { 58 failEarly := func(body []byte, err error) error { 59 resp.Body = ioutil.NopCloser(&readerWithError{bytes.NewReader(body), err}) 60 return err 61 } 62 63 body, err := ioutil.ReadAll(resp.Body) 64 if err != nil { 65 return failEarly(body, err) 66 } 67 resp.Body.Close() 68 69 var isCompressed bool 70 var ce string 71 if encodings, ok := resp.Header["Content-Encoding"]; ok && len(encodings) > 0 { 72 // TODO(xunjieli): Use the last CE for now. Support chained CEs. 73 ce = strings.ToLower(encodings[len(encodings)-1]) 74 isCompressed = (ce != "" && ce != "identity") 75 } 76 77 // Decompress as needed. 78 if isCompressed { 79 body, err = decompressBody(ce, body) 80 if err != nil { 81 return failEarly(body, err) 82 } 83 } 84 85 // Transform and recompress as needed. 86 body = f(body) 87 if isCompressed { 88 body, _, err = CompressBody(ce, body) 89 if err != nil { 90 return failEarly(body, err) 91 } 92 } 93 resp.Body = ioutil.NopCloser(bytes.NewReader(body)) 94 95 // ContentLength has changed, so update the outgoing headers accordingly. 96 if resp.ContentLength >= 0 { 97 resp.ContentLength = int64(len(body)) 98 resp.Header.Set("Content-Length", strconv.Itoa(len(body))) 99 } 100 return nil 101} 102 103// Decompresses Response Body in place. 104func DecompressResponse(resp *http.Response) error { 105 ce := strings.ToLower(resp.Header.Get("Content-Encoding")) 106 isCompressed := (ce != "" && ce != "identity") 107 if isCompressed { 108 body, err := ioutil.ReadAll(resp.Body) 109 if err != nil { 110 return err 111 } 112 resp.Body.Close() 113 body, err = decompressBody(ce, body) 114 if err != nil { 115 return err 116 } 117 resp.Body = ioutil.NopCloser(bytes.NewReader(body)) 118 } 119 return nil 120} 121 122// decompressBody reads a response body and decompresses according to the 123// given Content-Encoding. 124func decompressBody(ce string, compressed []byte) ([]byte, error) { 125 var r io.ReadCloser 126 switch strings.ToLower(ce) { 127 case "gzip": 128 var err error 129 r, err = gzip.NewReader(bytes.NewReader(compressed)) 130 if err != nil { 131 return nil, err 132 } 133 case "deflate": 134 r = flate.NewReader(bytes.NewReader(compressed)) 135 // TODO(catapult:3742): Implement Brotli support. 136 default: 137 // Unknown compression type or uncompressed. 138 return compressed, errors.New("unknown compression: " + ce) 139 } 140 defer r.Close() 141 return ioutil.ReadAll(r) 142} 143 144// CompressBody reads a response body and compresses according to the given 145// Accept-Encoding. 146// The chosen compressed encoding is returned along with the compressed body. 147func CompressBody(ae string, uncompressed []byte) ([]byte, string, error) { 148 var buf bytes.Buffer 149 var w io.WriteCloser 150 outCE := "" 151 ae = strings.ToLower(ae) 152 switch { 153 case strings.Contains(ae, "gzip"): 154 w = gzip.NewWriter(&buf) 155 outCE = "gzip" 156 case strings.Contains(ae, "deflate"): 157 w, _ = flate.NewWriter(&buf, flate.DefaultCompression) // never fails 158 outCE = "deflate" 159 default: 160 // Unknown compression type or compression not allowed. 161 return uncompressed, "identity", errors.New("unknown compression: " + ae) 162 } 163 if _, err := io.Copy(w, bytes.NewReader(uncompressed)); err != nil { 164 return buf.Bytes(), outCE, err 165 } 166 err := w.Close() 167 return buf.Bytes(), outCE, err 168} 169 170// getCSPScriptSrcDirectiveFromHeaders returns a Content-Security-Policy (CSP) 171// header's script source directive. If a header set does not have a CSP 172// header or if the CSP header does not have a script-src directive, 173// getCSPScriptSrcDirectiveFromHeaders returns an empty string. 174func getCSPScriptSrcDirectiveFromHeaders(header http.Header) string { 175 csp := header.Get("Content-Security-Policy") 176 if csp == "" { 177 return "" 178 } 179 180 directives := strings.Split(csp, ";") 181 default_directive := "" 182 for _, directive := range directives { 183 directive = strings.TrimSpace(directive) 184 if strings.HasPrefix(directive, "script-src") { 185 return directive 186 } 187 if strings.HasPrefix(directive, "default-src") { 188 default_directive = directive 189 } 190 } 191 192 return default_directive 193} 194 195// getScriptSrcNonceTokenFromCSPHeader returns the nonce token from a 196// Content-Security-Policy (CSP) header's script source directive, or an empty 197// string if the CSP header's script source 198// does not contain a nonce. 199// For more background information on CSP and nonce, please refer to 200// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ 201// Content-Security-Policy/script-src 202// https://developers.google.com/web/fundamentals/security/csp/ 203func getNonceTokenFromCSPHeaderScriptSrc(cspScriptSrc string) string { 204 cspScriptSrc = strings.Trim(cspScriptSrc, " ") 205 tokens := strings.Split(cspScriptSrc, " ") 206 for _, token := range tokens { 207 token = strings.TrimSpace(token) 208 if strings.HasPrefix(token, "'nonce-") { 209 token = strings.TrimPrefix(token, "'nonce-") 210 token = strings.TrimSuffix(token, "'") 211 return token 212 } 213 } 214 215 return "" 216} 217 218// transformCSPHeader transforms a Content-Security-Policy (CSP) header to 219// permit execution of inline scripts. Without this permission a page with a 220// restrictive CSP will not execute WPR 221// injected scripts. 222// For more background information on CSP, please refer to 223// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ 224// Content-Security-Policy/script-src 225// https://developers.google.com/web/fundamentals/security/csp/ 226func transformCSPHeader(header http.Header, injectedScriptSha256 string) { 227 csp := header.Get("Content-Security-Policy") 228 if csp == "" { 229 return 230 } 231 // We prefer the 'script-src', but if it doesn't exist, we want to update a 232 // 'default-src' directive if it exists. 233 directives := strings.Split(csp, ";") 234 updateIndex := -1 235 for index, directive := range directives { 236 directive = strings.TrimSpace(directive) 237 if strings.HasPrefix(directive, "script-src") || 238 strings.HasPrefix(directive, "default-src") { 239 updateIndex = index 240 if strings.HasPrefix(directive, "script-src") { 241 break 242 } 243 } 244 } 245 // No CSP policy to worry about updating. 246 if updateIndex < 0 { 247 return 248 } 249 updateDirective := directives[updateIndex] 250 if getNonceTokenFromCSPHeaderScriptSrc(updateDirective) != "" { 251 // If the CSP header's script-src contains a nonce, then 252 // transformCSPHeader does nothing. 253 // WPR will add the nonce token to any injected script to open the 254 // permission. 255 return 256 } 257 // Break the 'script-src' or 'default-src' directive into more tokens, 258 // and examine each token. 259 tokens := strings.Split(updateDirective, " ") 260 newDirective := "" 261 needsUnsafeInline := true 262 263 for _, token := range tokens { 264 token = strings.TrimSpace(token) 265 // All keyword tokens ['unsafe-inline', 'none', 'nonce-...', 'sha...''] 266 // are single-quote wrapped in the CSP headers. 267 if token == "'unsafe-inline'" { 268 needsUnsafeInline = false 269 } 270 // If the CSP header contains a hash, append the hash of the injected 271 // script. 272 // If a CSP specifies a hash, only inline scripts matching the hash 273 // may execute. 274 if strings.HasPrefix(token, "'sha256-") || 275 strings.HasPrefix(token, "'sha384-") || 276 strings.HasPrefix(token, "'sha512-") { 277 newDirective += "'sha256-" + injectedScriptSha256 + "' " 278 needsUnsafeInline = false 279 } 280 // Don't add back 'none' to our set, as if it is the only item it 281 // follows we will be adding 'unsafe-inline' below. 282 if token == "'none'" { 283 continue 284 } 285 newDirective += token + " " 286 } 287 288 if needsUnsafeInline { 289 newDirective += "'unsafe-inline'" 290 } 291 292 directives[updateIndex] = newDirective 293 newCsp := strings.Join(directives, ";") 294 header.Set("Content-Security-Policy", newCsp) 295} 296 297// ResponseTransformer is an interface for transforming HTTP responses. 298type ResponseTransformer interface { 299 // Transform applies transformations to the response. for example, by 300 // updating resp.Header or wrapping resp.Body. The transformer may inspect 301 // the request but should not modify the request. 302 Transform(req *http.Request, resp *http.Response) 303} 304 305// NewScriptInjector constructs a transformer that injects the given script 306// after the first <head>, <html>, or <!doctype html> tag. Statements in 307// script must be ';' terminated. The script is lightly minified before 308// injection. 309func NewScriptInjector( 310 script []byte, replacements map[string]string) ResponseTransformer { 311 // Remove C-style comments. 312 script = jsMultilineCommentRE.ReplaceAllLiteral(script, []byte("")) 313 script = jsSinglelineCommentRE.ReplaceAllLiteral(script, []byte("")) 314 for oldstr, newstr := range replacements { 315 script = bytes.Replace(script, []byte(oldstr), []byte(newstr), -1) 316 } 317 // Remove line breaks. 318 script = bytes.Replace(script, []byte("\r\n"), []byte(""), -1) 319 // Compute the sha256 hash of the script content. 320 // WPR may need to use the sha256 hash in a CSP header to grant the injected 321 // script execute permission. 322 sha256Bytes := sha256.Sum256(script) 323 sha256String := base64.URLEncoding.EncodeToString(sha256Bytes[:]) 324 return &scriptInjector{script, sha256String} 325} 326 327// NewScriptInjectorFromFile creates a script injector from a script stored in 328// a file. 329func NewScriptInjectorFromFile( 330 filename string, replacements map[string]string) ( 331 ResponseTransformer, error) { 332 script, err := ioutil.ReadFile(filename) 333 if err != nil { 334 return nil, err 335 } 336 return NewScriptInjector(script, replacements), nil 337} 338 339var ( 340 jsMultilineCommentRE = regexp.MustCompile(`(?is)/\*.*?\*/`) 341 jsSinglelineCommentRE = regexp.MustCompile(`(?i)//.*`) 342 doctypeRE = regexp.MustCompile( 343 `(?is)^.*?(<!--.*-->)?.*?<!doctype html>`) 344 htmlRE = regexp.MustCompile( 345 `(?is)^.*?(<!--.*-->)?.*?<html.*?>`) 346 headRE = regexp.MustCompile( 347 `(?is)^.*?(<!--.*-->)?.*?<head.*?>`) 348) 349 350type scriptInjector struct { 351 script []byte 352 sha256 string 353} 354 355// Given a nonce, getScriptWithNonce returns the injected script text with the 356// nonce. 357// If nonce is an empty string, getScriptWithNonce returns the script block 358// without attaching a nonce attribute. 359// Some responses may specify a nonce inside their Content-Security-Policy, 360// script-src directive. 361// The script injector needs to set the injected script's nonce attribute to 362// open execute permission for the injected script. 363func (si *scriptInjector) getScriptWithNonce(nonce string) []byte { 364 var buffer bytes.Buffer 365 buffer.Write([]byte("<script")) 366 if nonce != "" { 367 buffer.Write([]byte(" nonce=\""+nonce+"\"")) 368 } 369 buffer.Write([]byte(">")) 370 buffer.Write(si.script) 371 buffer.Write([]byte("</script>")) 372 return buffer.Bytes() 373} 374 375func (si *scriptInjector) Transform(_ *http.Request, resp *http.Response) { 376 // Skip non-HTML non-200 responses. 377 if !strings.HasPrefix( 378 strings.ToLower(resp.Header.Get("Content-Type")), "text/html") { 379 return 380 } 381 if resp.StatusCode != http.StatusOK { 382 return 383 } 384 385 transformResponseBody(resp, func(body []byte) []byte { 386 // Don't inject if the script has already been injected. 387 if bytes.Contains(body, si.script) { 388 return body 389 } 390 391 // Find an appropriate place to inject the script, then inject. 392 idx := headRE.FindIndex(body) 393 if idx == nil { 394 idx = htmlRE.FindIndex(body) 395 } 396 if idx == nil { 397 idx = doctypeRE.FindIndex(body) 398 } 399 if idx == nil { 400 log.Printf( 401 "ScriptInjector(%s): no start tags found, skip injecting script", 402 resp.Request.URL) 403 return body 404 } 405 n := idx[1] 406 407 // If the response has a content-script-policy script src directive that 408 // specifies a nonce, add the nonce to the injected script. 409 // If a CSP specifies a nonce, only script blocks containing a matching 410 // nonce attribute may execute. 411 // To open permission for WPR-injected scripts while preserving permission 412 // for any page-src scripts containing the nonce, WPR must add the nonce 413 // token to injected scripts. Please see http://crbug.com/904534 for a 414 // detailed case study. 415 nonce := "" 416 if directive := getCSPScriptSrcDirectiveFromHeaders(resp.Header); 417 directive != "" { 418 nonce = getNonceTokenFromCSPHeaderScriptSrc(directive) 419 } 420 421 var buffer bytes.Buffer 422 buffer.Write(body[:n]) 423 buffer.Write(si.getScriptWithNonce(nonce)) 424 buffer.Write(body[n:]) 425 426 // Having injected script, transform the response's 427 // content-security-policy directive to allow the injected script to 428 // execute. 429 transformCSPHeader(resp.Header, si.sha256) 430 return buffer.Bytes() 431 }) 432} 433 434// NewRuleBasedTransformer creates a transformer that is controlled by a rules 435// file. 436// Rules are specified as a JSON-encoded array of TransformerRule objects. 437func NewRuleBasedTransformer(filename string) (ResponseTransformer, error) { 438 raw, err := ioutil.ReadFile(filename) 439 if err != nil { 440 return nil, err 441 } 442 var rules []*TransformerRule 443 if err := json.Unmarshal(raw, &rules); err != nil { 444 return nil, fmt.Errorf("json unmarshal failed: %v", err) 445 } 446 for _, r := range rules { 447 if err := r.compile(); err != nil { 448 return nil, err 449 } 450 } 451 return &ruleBasedTransformer{rules}, nil 452} 453 454// TransformerRule is a single JSON-encoded rule. Each rule matches either a 455// specific URL (via URL) or a regexp pattern (via URLPattern). 456type TransformerRule struct { 457 // How to match URLs: exactly one of URL and URLPattern must be specified. 458 URL string 459 URLPattern string 460 461 // Rules to apply to these URLs. 462 // Inject these extra headers into the response 463 ExtraHeaders http.Header 464 // Inject these HTTP/2 PUSH_PROMISE frames into the response 465 Push []PushPromiseRule 466 467 // Hidden state generated by compile. 468 urlRE *regexp.Regexp 469} 470 471// PushPromiseRule is a rule that adds pushes into the response stream. 472type PushPromiseRule struct { 473 // URL to push. 474 URL string 475 476 // Header for the request being simulated by this push. If empty, a default 477 // set of headers are created by cloning the current request's headers and 478 // setting 479 // "referer" to the URL of the current (pusher) request. 480 Headers http.Header 481} 482 483type ruleBasedTransformer struct { 484 rules []*TransformerRule 485} 486 487func (r *TransformerRule) compile() error { 488 raw, _ := json.Marshal(r) 489 if r.URL == "" && r.URLPattern == "" { 490 return fmt.Errorf("rule missing URL or URLPattern: %q", raw) 491 } 492 if r.URL != "" && r.URLPattern != "" { 493 return fmt.Errorf("rule has both URL and URLPattern: %q", raw) 494 } 495 if r.URLPattern != "" { 496 re, err := regexp.Compile(r.URLPattern) 497 if err != nil { 498 return fmt.Errorf("error compiling URLPattern %s: %v", r.URLPattern, err) 499 } 500 r.urlRE = re 501 } 502 if len(r.ExtraHeaders) == 0 && len(r.Push) == 0 { 503 return fmt.Errorf("rule has no affect: %q", raw) 504 } 505 for _, p := range r.Push { 506 if p.URL == "" { 507 return fmt.Errorf("push has empty URL: %q", raw) 508 } 509 if u, err := url.Parse(p.URL); err != nil || !u.IsAbs() || 510 (u.Scheme != "http" && u.Scheme != "https") { 511 return fmt.Errorf("push has bad URL %s: %v", p.URL, err) 512 } 513 } 514 return nil 515} 516 517func (r *TransformerRule) matches(req *http.Request) bool { 518 if r.URL != "" { 519 return r.URL == req.URL.String() 520 } 521 return r.urlRE.MatchString(req.URL.String()) 522} 523 524func (r *TransformerRule) shortString() string { 525 pushes := "" 526 for _, p := range r.Push { 527 pushes += p.URL + " " 528 } 529 return fmt.Sprintf("ExtraHeaders: %d; Push: [%s]", len(r.ExtraHeaders), 530 pushes) 531} 532 533func (rt *ruleBasedTransformer) Transform( 534 req *http.Request, resp *http.Response) { 535 for _, r := range rt.rules { 536 if !r.matches(req) { 537 continue 538 } 539 log.Printf("Rule(%s): matched rule %v", req.URL, r.shortString()) 540 for k, v := range r.ExtraHeaders { 541 resp.Header[k] = append(resp.Header[k], v...) 542 } 543 /* 544 if disabled { 545 for _, p := range r.Push { 546 h := p.Headers 547 if len(h) == 0 { 548 h = cloneHeaders(req.Header) 549 h.Set("Referer", req.URL.String()) 550 } 551 rw.Push(p.URL, h) 552 } 553 } 554 */ 555 } 556} 557