1#!/usr/bin/env python3
2from sys import exit
3from os import environ # to set LC_ALL
4from test.http_test import HTTPTest
5from misc.wget_file import WgetFile
6
7"""
8This test ensures that Wget keeps reserved characters in URLs in non-UTF-8 charsets.
9"""
10# This bug only happened with ASCII charset,
11# so we need to set LC_ALL="C" in order to reproduce it.
12environ["LC_ALL"] = "C"
13
14######### File Definitions #########
15RequestList = [
16    [
17        "HEAD /base.html",
18        "GET /base.html",
19        "GET /robots.txt",
20        "HEAD /a%2Bb.html",
21        "GET /a%2Bb.html"
22    ]
23]
24A_File_Name = "base.html"
25B_File_Name = "a%2Bb.html"
26A_File = WgetFile (A_File_Name, "<a href=\"a%2Bb.html\">")
27B_File = WgetFile (B_File_Name, "this is file B")
28
29WGET_OPTIONS = " --spider -r"
30WGET_URLS = [[A_File_Name]]
31
32Files = [[A_File, B_File]]
33
34ExpectedReturnCode = 0
35ExpectedDownloadedFiles = []
36
37######### Pre and Post Test Hooks #########
38pre_test = {
39    "ServerFiles"   : Files
40}
41test_options = {
42    "WgetCommands"      : WGET_OPTIONS,
43    "Urls"              : WGET_URLS
44}
45post_test = {
46    "ExpectedFiles"     : ExpectedDownloadedFiles,
47    "ExpectedRetcode"   : ExpectedReturnCode,
48    "FilesCrawled"      : RequestList
49}
50
51err = HTTPTest (
52                pre_hook=pre_test,
53                test_params=test_options,
54                post_hook=post_test
55).begin ()
56
57exit (err)
58