1 #pragma once
2 
3 
4 #include <xmpp/xmpp_stanza.hpp>
5 
6 #include <functional>
7 
8 #include <expat.h>
9 
10 /**
11  * A SAX XML parser that builds XML nodes and spawns events when a complete
12  * stanza is received (an element of level 2), or when the document is
13  * opened/closed (an element of level 1)
14  *
15  * After a stanza_event has been spawned, we delete the whole stanza. This
16  * means that even with a very long document (in XMPP the document is
17  * potentially infinite), the memory is never exhausted as long as each
18  * stanza is reasonnably short.
19  *
20  * The element names generated by expat contain the namespace of the
21  * element, a colon (':') and then the actual name of the element.  To get
22  * an element "x" with a namespace of "http://jabber.org/protocol/muc", you
23  * just look for an XmlNode named "http://jabber.org/protocol/muc:x"
24  *
25  * TODO: enforce the size-limit for the stanza (limit the number of childs
26  * it can contain). For example forbid the parser going further than level
27  * 20 (arbitrary number here), and each XML node to have more than 15 childs
28  * (arbitrary number again).
29  */
30 class XmppParser
31 {
32 public:
33   explicit XmppParser();
34   ~XmppParser();
35   XmppParser(const XmppParser&) = delete;
36   XmppParser& operator=(const XmppParser&) = delete;
37   XmppParser(XmppParser&&) = delete;
38   XmppParser& operator=(XmppParser&&) = delete;
39 
40 public:
41   /**
42    * Feed the parser with some XML data
43    */
44   int feed(const char* data, const int len, const bool is_final);
45   /**
46    * Parse the data placed in the parser buffer
47    */
48   int parse(const int size, const bool is_final);
49   /**
50    * Reset the parser, so it can be used from scratch afterward
51    */
52   void reset();
53   /**
54    * Get a buffer provided by the xml parser.
55    */
56   void* get_buffer(const size_t size) const;
57   /**
58    * Add one callback for the various events that this parser can spawn.
59    */
60   void add_stanza_callback(std::function<void(const Stanza&)>&& callback);
61   void add_stream_open_callback(std::function<void(const XmlNode&)>&& callback);
62   void add_stream_close_callback(std::function<void(const XmlNode&)>&& callback);
63 
64   /**
65    * Called when a new XML element has been opened. We instanciate a new
66    * XmlNode and set it as our current node. The parent of this new node is
67    * the previous "current" node. We have all the element's attributes in
68    * this event.
69    *
70    * We spawn a stream_event with this node if this is a level-1 element.
71    */
72   void start_element(const XML_Char* name, const XML_Char** attribute);
73   /**
74    * Called when an XML element has been closed. We close the current_node,
75    * set our current_node as the parent of the current_node, and if that was
76    * a level-2 element we spawn a stanza_event with this node.
77    *
78    * And we then delete the stanza (and everything under it, its children,
79    * attribute, etc).
80    */
81   void end_element(const XML_Char* name);
82   /**
83    * Some inner or tail data has been parsed
84    */
85   void char_data(const XML_Char* data, const size_t len);
86   /**
87    * Calls all the stanza_callbacks one by one.
88    */
89   void stanza_event(const Stanza& stanza) const;
90   /**
91    * Calls all the stream_open_callbacks one by one. Note: the passed node is not
92    * closed yet.
93    */
94   void stream_open_event(const XmlNode& node) const;
95   /**
96    * Calls all the stream_close_callbacks one by one.
97    */
98   void stream_close_event(const XmlNode& node) const;
99 
100 private:
101   /**
102    * Init the XML parser and install the callbacks
103    */
104   void init_xml_parser();
105 
106   /**
107    * Expat structure.
108    */
109   XML_Parser parser{};
110   /**
111    * The current depth in the XML document
112    */
113   size_t level;
114   /**
115    * The deepest XML node opened but not yet closed (to which we are adding
116    * new children, inner or tail)
117    */
118   XmlNode* current_node;
119   /**
120    * The root node has no parent, so we keep it here: the XmppParser object
121    * is its owner.
122    */
123   std::unique_ptr<XmlNode> root;
124   /**
125    * A list of callbacks to be called on an *_event, receiving the
126    * concerned Stanza/XmlNode.
127    */
128   std::vector<std::function<void(const Stanza&)>> stanza_callbacks;
129   std::vector<std::function<void(const XmlNode&)>> stream_open_callbacks;
130   std::vector<std::function<void(const XmlNode&)>> stream_close_callbacks;
131 };
132 
133 
134