1 /*
2 * Copyright (c) 2013 Tim Ruehsen
3 * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4 *
5 * This file is part of libwget.
6 *
7 * Libwget is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * Libwget is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with libwget. If not, see <https://www.gnu.org/licenses/>.
19 *
20 *
21 * Advanced example for CSS parsing using libwget
22 *
23 * Changelog
24 * 15.01.2013 Tim Ruehsen created
25 *
26 * Demonstrate how to extract URIs from CSS files into a vector.
27 *
28 */
29
30 #include <unistd.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <wget.h>
34
35 // use the helper routines provided by libwget
36 #define info_printf wget_info_printf
37 // #define error_printf wget_error_printf
38 #define error_printf_exit wget_error_printf_exit
39
usage(const char * myname)40 static void WGET_GCC_NORETURN usage(const char *myname)
41 {
42 error_printf_exit(
43 "\nUsage: %s [options] file...\n"\
44 " --base <URI> Default base for relative URIs, default: http://www.example.com\n"\
45 " --encoding <Encoding> Default file character encoding, default: iso-8859-1\n"\
46 "\n"\
47 " Examples:\n"\
48 " %s --base http://www.mydomain.com x.css\n"\
49 " cat x.css | %s --base http://www.mydomain.com -\n"\
50 " %s http://www.example.com\n"\
51 "\n"\
52 " Print URIs as found (without a base):\n"\
53 " %s --base \"\" x.css\n\n",
54 myname, myname, myname, myname, myname);
55 }
56
main(int argc,const char ** argv)57 int main(int argc, const char **argv)
58 {
59 // Base URI for converting relative to absolute URIs
60 const char *
61 base = NULL;
62
63 // We assume that base is encoded in the local charset.
64 const char *
65 local_encoding = wget_local_charset_encoding();
66
67 // parsed 'base'
68 wget_iri
69 *base_uri;
70
71 // Character encoding of CSS file content
72 // An HTTP response may contain the encoding in the Content-Type header,
73 // see https://stackoverflow.com/questions/2526033/why-specify-charset-utf-8-in-your-css-file
74 const char *
75 css_encoding = NULL;
76
77 int
78 argpos;
79
80 // We want the libwget error messages be printed to STDERR.
81 // From here on, we can call wget_error_printf, etc.
82 wget_logger_set_stream(wget_get_logger(WGET_LOGGER_ERROR), stderr);
83
84 // We want the libwget info messages be printed to STDOUT.
85 // From here on, we can call wget_info_printf, etc.
86 wget_logger_set_stream(wget_get_logger(WGET_LOGGER_INFO), stdout);
87
88 // parse options
89 for (argpos = 1; argpos < argc; argpos++) {
90 if (!strcmp(argv[argpos], "--base") && argc - argpos > 1) {
91 base = argv[++argpos];
92 info_printf("Local URI encoding = '%s'\n", local_encoding);
93 } else if (!strcmp(argv[argpos], "--encoding") && argc - argpos > 1) {
94 css_encoding = argv[++argpos];
95 } else if (!strcmp(argv[argpos], "--")) {
96 argpos++;
97 break;
98 } else if (argv[argpos][0] == '-') {
99 usage(argv[0]);
100 } else
101 break;
102 }
103
104 // All URIs are converted into UTF-8 charset.
105 // That's why we need the local encoding (aka 'encoding of base URI') here.
106 base_uri = base ? wget_iri_parse(base, local_encoding) : NULL;
107
108 for (;argpos < argc; argpos++) {
109 // use '-' as filename for STDIN
110 wget_vector *css_urls = wget_css_get_urls_from_localfile(argv[argpos], base_uri, &css_encoding);
111
112 if (wget_vector_size(css_urls) > 0) {
113 info_printf("URL encoding for %s is '%s':\n", argv[argpos], css_encoding ? css_encoding : "UTF-8");
114
115 for (int it = 0; it < wget_vector_size(css_urls); it++) {
116 wget_css_parsed_url *css_url = wget_vector_get(css_urls, it);
117 if (css_url->abs_url)
118 info_printf(" %s -> %s\n", css_url->url, css_url->abs_url);
119 else
120 info_printf(" %s\n", css_url->url);
121 }
122
123 info_printf("\n");
124 }
125
126 wget_vector_free(&css_urls);
127 }
128
129 wget_iri_free(&base_uri);
130
131 return 0;
132 }
133