1 // This may look like C code, but it's really -*- C++ -*-
2 /*
3  * Copyright (C) 2011 Emweb bv, Herent, Belgium.
4  *
5  * See the LICENSE file for terms of use.
6  */
7 #ifndef WT_UTILS_H_
8 #define WT_UTILS_H_
9 
10 #include <Wt/WGlobal.h>
11 #include <string>
12 #include <vector>
13 
14 /*! \file Utils
15  */
16 
17 namespace Wt {
18 
19   /*! \brief Namespace with utility functions.
20    *
21    * This namespace contains functions for computing message digests
22    * with cryptographic hash functions (md5, sha1), and functions that
23    * implement encoding and decoding for common encodings.
24    *
25    * These utility functions can be accessed by including the
26    * \c Wt/Utils.h header.
27    */
28   namespace Utils {
29 
30 /*! \brief An enumeration for HTML encoding flags.
31  */
32 enum class HtmlEncodingFlag
33 {
34   //! \brief Encode new-lines as line breaks (&lt;br&gt;)
35   EncodeNewLines = 0x1
36 };
37 
38 /*! \brief Computes an MD5 hash.
39  *
40  * This utility function computes an MD5 hash, and returns the raw
41  * (binary) hash value.
42  *
43  * \sa sha1()
44  */
45 WT_API extern std::string md5(const std::string& data);
46 
47 /*! \brief Computes a SHA-1 hash.
48  *
49  * This utility function computes a SHA-1 hash, and returns the raw
50  * (binary) hash value.
51  *
52  * \sa md5()
53  */
54 #ifndef WT_TARGET_JAVA
55 WT_API extern std::string sha1(const std::string& data);
56 #else
57 WT_API extern std::vector<unsigned char> sha1(const std::string& data);
58 #endif
59 
60 
61 /*! \brief Performs Base64-encoding of data.
62  *
63  * This utility function implements a Base64 encoding (RFC 2045) of
64  * the \p data.
65  *
66  * When the crlf argument is true, a CRLF character will be added
67  * after each sequence of 76 characters.
68  *
69  * \sa base64Decode()
70  */
71 WT_API extern std::string base64Encode(const std::string& data,
72 				       bool crlf = true);
73 
74 /*! \brief Performs Base64-decoding of data.
75  *
76  * This utility function implements a Base64 decoding (RFC 2045) of
77  * the \p data. Illegal characters are discarded and skipped.
78  *
79  * \sa base64Encode()
80  */
81 #ifndef WT_TARGET_JAVA
82 WT_API extern std::string base64Decode(const std::string& data);
83 #else
84 WT_API extern std::vector<unsigned char> base64Decode(const std::string& data);
85 WT_API extern std::string base64DecodeS(const std::string& data);
86 #endif
87 
88 /*! \brief Performs Hex-encoding of data.
89  *
90  * A hex-encoding outputs the value of every byte as as two-digit
91  * hexadecimal number.
92  *
93  * \sa hexDecode()
94  */
95 WT_API extern std::string hexEncode(const std::string& data);
96 
97 /*! \brief Performs Hex-decoding of data.
98  *
99  * Illegal characters are discarded and skipped.
100  *
101  * \sa hexEncode()
102  */
103 WT_API extern std::string hexDecode(const std::string& data);
104 
105 /*! \brief Performs HTML encoding of text.
106  *
107  * This utility function escapes characters so that the \p text can
108  * be embodied verbatim in a HTML text block.
109  */
110 WT_API extern std::string htmlEncode(const std::string& text,
111 				     WFlags<HtmlEncodingFlag> flags = None);
112 
113 /*! \brief Performs HTML encoding of text.
114  *
115  * This utility function escapes characters so that the \p text can
116  * be embodied verbatim in a HTML text block.
117  *
118  * By default, newlines are ignored. By passing the HtmlEncodingFlag::EncodeNewLines
119  * flag, these may be encoded as line breaks (&lt;br&gt;).
120  */
121 WT_API extern WString htmlEncode(const WString& text,
122 				 WFlags<HtmlEncodingFlag> flags = None);
123 
124 /*! \brief Performs Url encoding (aka percentage encoding).
125  *
126  * This utility function percent encodes a \p text so that it can be
127  * embodied verbatim in a URL (e.g. as a fragment).
128  *
129  * \note To url encode a unicode string, the de-facto standard
130  * practice is to encode a UTF-8 encoded string.
131  *
132  * \sa WString::toUTF8(), urlDecode()
133  */
134 WT_API extern std::string urlEncode(const std::string& text);
135 
136 /*! \brief Performs Url decoding.
137  *
138  * This utility function percent encodes a \p text so that it can be
139  * embodied verbatim in a URL (e.g. as a fragment).
140  *
141  * \note To url decode a unicode string, the de-facto standard
142  * practice is to interpret the string as a UTF-8 encoded string.
143  *
144  * \sa WString::fromUTF8(), urlEncode()
145  */
146 WT_API extern std::string urlDecode(const std::string& text);
147 
148 /*! \brief Remove tags/attributes from text that are not passive.
149  *
150  * This removes tags and attributes from XHTML-formatted text that do
151  * not simply display something but may trigger scripting, and could
152  * have been injected by a malicious user for Cross-Site Scripting
153  * (XSS).
154  *
155  * This method is used by the library to sanitize XHTML-formatted text
156  * set in WText, but it may also be useful outside the library to
157  * sanitize user content when directly using JavaScript.
158  *
159  * Modifies the \p text if needed. When the text is not proper XML,
160  * returns \c false.
161  */
162 WT_API extern bool removeScript(WString& text);
163 
164 /*! \brief Guess the image mime type from an image.
165  *
166  * This function examines the header of an image and tries to identify
167  * the image type.
168  *
169  * At the moment, it recognizes and returns as mime type :
170  * - image/png
171  * - image/jpeg
172  * - image/gif
173  * - image/bmp
174  *
175  * The header should contain (at least) the 25 first bytes of the image data.
176  *
177  * If no mime-type could be derived, an empty string is returned.
178  *
179  * \sa guessImageMimeTypeData()
180  */
181 WT_API extern std::string
182 guessImageMimeTypeData(const std::vector<unsigned char>& header);
183 
184 /*! \brief Guess the image mime type from an image.
185  *
186  * This function opens the image \p file, reads the first 25 bytes and calls
187  * guessImageMimeTypeData() to infer the mime type.
188  */
189 WT_API extern std::string guessImageMimeType(const std::string& file);
190 
191 WT_API extern std::string createDataUrl(std::vector<unsigned char>& data, std::string mimeType);
192 
193 /*! \brief Computes a hash-based message authentication code.
194  *
195  * This utility function computes a HMAC, and returns the raw
196  * (binary) hash value. Takes as arguments the text to be hashed, a
197  * secret key, a function pointer to a hashfunction, the internal
198  * block size of the hashfunction in bytes and the size of the
199  * resulting hash value the function produces. A maximum blocksize of
200  * 2048 bits (256 bytes) is supported.
201  *
202  * \sa hmac_sha1()
203  * \sa hmac_md5()
204  */
205 WT_API extern std::string hmac(const std::string& text,
206                                const std::string& key,
207                                std::string (*hashfunction)(const std::string&),
208                                size_t blocksize,
209                                size_t keysize);
210 
211 /*! \brief Computes a hash-based message authentication code.
212  *
213  * Uses the md5 hashfunction, returns a raw (binary) hash value.
214  *
215  * \sa hmac()
216  */
217 WT_API extern std::string hmac_md5(const std::string& text, const std::string& key);
218 
219 /*! \brief Computes a hash-based message authentication code.
220  *
221  * Uses the sha1 hashfunction, returns a raw (binary) hash value.
222  *
223  * \sa hmac()
224  */
225 WT_API extern std::string hmac_sha1(const std::string& text, const std::string& key);
226 
227 }
228 
229 }
230 
231 W_DECLARE_OPERATORS_FOR_FLAGS(Wt::Utils::HtmlEncodingFlag)
232 
233 #endif // WT_UTILS_H_
234