1 /*
2  * Copyright (c) 2013 Tim Ruehsen
3  * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4  *
5  * This file is part of Wget
6  *
7  * Wget is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * Wget is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with Wget  If not, see <https://www.gnu.org/licenses/>.
19  *
20  *
21  * Testing Wget
22  *
23  * Changelog
24  * 27.07.2013  Tim Ruehsen  created
25  *
26  */
27 
28 #include <config.h>
29 
30 #include <stdlib.h> // exit()
31 #include "libtest.h"
32 
33 #define ccedilla_l15 "\xE7"
34 #define ccedilla_u8 "\xC3\xA7"
35 #define eurosign_l15 "\xA4"
36 #define eurosign_u8 "\xE2\x82\xAC"
37 #define eacute_l15 "\xE9"
38 #define eacute_u8 "\xC3\xA9"
39 
main(void)40 int main(void)
41 {
42 	wget_test_url_t urls[]={
43 		{	.name = "/index.html",
44 			.code = "200 Dontcare",
45 			.body =
46 				"<html><head><title>Main Page</title></head><body><p>" \
47 				"Link to page 1 <a href=\"http://localhost:{{port}}/p1_fran" ccedilla_l15 "ais.html\">La seule page en fran&ccedil;ais</a>." \
48 				"Link to page 1 <a href=\"http://localhost:{{port}}/p3_" eurosign_l15 eurosign_l15 eurosign_l15 ".html\">My tailor is rich</a>." \
49 				"</p></body></html>",
50 			.headers = {
51 				"Content-type: text/html; charset=ISO-8859-15",
52 			}
53 		},
54 		{	.name = "/robots.txt",
55 			.code = "200 Dontcare",
56 			.body = "",
57 			.headers = {
58 				"Content-type: text/plain",
59 			}
60 		},
61 		{	.name = "/p1_fran%C3%A7ais.html", // UTF-8 encoded
62 			.code = "200 Dontcare",
63 			.body =
64 				"<html><head><title>La seule page en fran" ccedilla_l15 "ais</title>" \
65 				"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body>" \
66 				"<p>Link to page 2 <a href=\"http://localhost:{{port}}/p2_" eacute_l15 eacute_l15 "n.html\">Die enkele nerderlangstalige pagina</a>." \
67 				"</p></body></html>",
68 			.headers = {
69 				"Content-type: text/html; charset=ISO-8859-15", // overrides META tag in document
70 			}
71 		},
72 		{	.name = "/p2_%C3%A9%C3%A9n.html", // UTF-8 encoded
73 			.code = "200 Dontcare",
74 			.body =
75 				"<html><head><title>Die enkele nederlandstalige pagina</title>" \
76 				"</head><body><p>&Eacute;&eacute;n is niet veel maar toch meer dan nul.<br/>" \
77 				"Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :)<br/>" \
78 				"</p></body></html>",
79 			.headers = {
80 				"Content-type: text/html; charset=UTF-8",
81 			},
82 		},
83 		{	.name = "/p2_%E9%E9n.html",
84 			.code = "200 Dontcare",
85 			.body =
86 				"<html><head><title>Die enkele nederlandstalige pagina</title>" \
87 				"</head><body><p>&Eacute;&eacute;n is niet veel maar toch meer dan nul.<br/>" \
88 				"Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :)<br/>" \
89 				"</p></body></html>",
90 			.headers = {
91 				"Content-type: text/html; charset=ISO-8859-1",
92 			},
93 		},
94 		{	.name = "/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html", // UTF-8 encoded
95 			.code = "200 Dontcare",
96 			.body =
97 				"<html><head><title>Euro page</title>" \
98 				"</head><body><p>My tailor isn't rich anymore.</p></body></html>",
99 			.headers = {
100 				"Content-type: text/plain",
101 			},
102 		},
103 	};
104 
105 	// functions won't come back if an error occurs
106 	wget_test_start_server(
107 		WGET_TEST_RESPONSE_URLS, &urls, countof(urls),
108 		WGET_TEST_FEATURE_MHD,
109 		0);
110 
111 	// test-iri-disabled
112 	wget_test(
113 		// WGET_TEST_KEEP_TMPFILES, 1,
114 		WGET_TEST_OPTIONS, "-e robots=on --trust-server-names --local-encoding=UTF-8 --remote-encoding=iso-8859-1 -nH -r",
115 		WGET_TEST_REQUEST_URL, "index.html",
116 		WGET_TEST_EXPECTED_ERROR_CODE, 0,
117 		WGET_TEST_EXPECTED_FILES, &(wget_test_file_t []) {
118 			{ urls[0].name + 1, urls[0].body },
119 			{ urls[1].name + 1, urls[1].body },
120 			{ "p1_fran" ccedilla_u8 "ais.html", urls[2].body },
121 			{ "p2_" eacute_u8 eacute_u8 "n.html", urls[3].body },
122 			{ "p3_" eurosign_u8 eurosign_u8 eurosign_u8 ".html", urls[5].body },
123 			{	NULL } },
124 		0);
125 
126 	exit(EXIT_SUCCESS);
127 }
128