1 /*
2  * nghttp2 - HTTP/2 C Library
3  *
4  * Copyright (c) 2012 Tatsuhiro Tsujikawa
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 #ifndef HTML_PARSER_H
26 #define HTML_PARSER_H
27 
28 #include "nghttp2_config.h"
29 
30 #include <vector>
31 #include <string>
32 
33 #ifdef HAVE_LIBXML2
34 
35 #  include <libxml/HTMLparser.h>
36 
37 #endif // HAVE_LIBXML2
38 
39 namespace nghttp2 {
40 
41 enum ResourceType {
42   REQ_CSS = 1,
43   REQ_JS,
44   REQ_UNBLOCK_JS,
45   REQ_IMG,
46   REQ_OTHERS,
47 };
48 
49 struct ParserData {
50   std::string base_uri;
51   std::vector<std::pair<std::string, ResourceType>> links;
52   // > 0 if we are inside "head" element.
53   int inside_head;
54   ParserData(const std::string &base_uri);
55 };
56 
57 #ifdef HAVE_LIBXML2
58 
59 class HtmlParser {
60 public:
61   HtmlParser(const std::string &base_uri);
62   ~HtmlParser();
63   int parse_chunk(const char *chunk, size_t size, int fin);
64   const std::vector<std::pair<std::string, ResourceType>> &get_links() const;
65   void clear_links();
66 
67 private:
68   int parse_chunk_internal(const char *chunk, size_t size, int fin);
69 
70   std::string base_uri_;
71   htmlParserCtxtPtr parser_ctx_;
72   ParserData parser_data_;
73 };
74 
75 #else // !HAVE_LIBXML2
76 
77 class HtmlParser {
78 public:
HtmlParser(const std::string & base_uri)79   HtmlParser(const std::string &base_uri) {}
parse_chunk(const char * chunk,size_t size,int fin)80   int parse_chunk(const char *chunk, size_t size, int fin) { return 0; }
get_links()81   const std::vector<std::pair<std::string, ResourceType>> &get_links() const {
82     return links_;
83   }
clear_links()84   void clear_links() {}
85 
86 private:
87   std::vector<std::pair<std::string, ResourceType>> links_;
88 };
89 
90 #endif // !HAVE_LIBXML2
91 
92 } // namespace nghttp2
93 
94 #endif // HTML_PARSER_H
95