1 /*
2 * libjingle
3 * Copyright 2004--2011, Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifndef TALK_BASE_STRINGENCODE_H__
29 #define TALK_BASE_STRINGENCODE_H__
30
31 #include <string>
32 #include <sstream>
33 #include <vector>
34
35 namespace talk_base {
36
37 //////////////////////////////////////////////////////////////////////
38 // String Encoding Utilities
39 //////////////////////////////////////////////////////////////////////
40
41 // Convert an unsigned value from 0 to 15 to the hex character equivalent...
42 char hex_encode(unsigned char val);
43 // ...and vice-versa.
44 bool hex_decode(char ch, unsigned char* val);
45
46 // Convert an unsigned value to it's utf8 representation. Returns the length
47 // of the encoded string, or 0 if the encoding is longer than buflen - 1.
48 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
49 // Decode the utf8 encoded value pointed to by source. Returns the number of
50 // bytes used by the encoding, or 0 if the encoding is invalid.
51 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
52
53 // Escaping prefixes illegal characters with the escape character. Compact, but
54 // illegal characters still appear in the string.
55 size_t escape(char * buffer, size_t buflen,
56 const char * source, size_t srclen,
57 const char * illegal, char escape);
58 // Note: in-place unescaping (buffer == source) is allowed.
59 size_t unescape(char * buffer, size_t buflen,
60 const char * source, size_t srclen,
61 char escape);
62
63 // Encoding replaces illegal characters with the escape character and 2 hex
64 // chars, so it's a little less compact than escape, but completely removes
65 // illegal characters. note that hex digits should not be used as illegal
66 // characters.
67 size_t encode(char * buffer, size_t buflen,
68 const char * source, size_t srclen,
69 const char * illegal, char escape);
70 // Note: in-place decoding (buffer == source) is allowed.
71 size_t decode(char * buffer, size_t buflen,
72 const char * source, size_t srclen,
73 char escape);
74
75 // Returns a list of characters that may be unsafe for use in the name of a
76 // file, suitable for passing to the 'illegal' member of escape or encode.
77 const char* unsafe_filename_characters();
78
79 // url_encode is an encode operation with a predefined set of illegal characters
80 // and escape character (for use in URLs, obviously).
81 size_t url_encode(char * buffer, size_t buflen,
82 const char * source, size_t srclen);
83 // Note: in-place decoding (buffer == source) is allowed.
84 size_t url_decode(char * buffer, size_t buflen,
85 const char * source, size_t srclen);
86
87 // html_encode prevents data embedded in html from containing markup.
88 size_t html_encode(char * buffer, size_t buflen,
89 const char * source, size_t srclen);
90 // Note: in-place decoding (buffer == source) is allowed.
91 size_t html_decode(char * buffer, size_t buflen,
92 const char * source, size_t srclen);
93
94 // xml_encode makes data suitable for inside xml attributes and values.
95 size_t xml_encode(char * buffer, size_t buflen,
96 const char * source, size_t srclen);
97 // Note: in-place decoding (buffer == source) is allowed.
98 size_t xml_decode(char * buffer, size_t buflen,
99 const char * source, size_t srclen);
100
101 // hex_encode shows the hex representation of binary data in ascii.
102 size_t hex_encode(char * buffer, size_t buflen,
103 const char * source, size_t srclen);
104 size_t hex_decode(char * buffer, size_t buflen,
105 const char * source, size_t srclen);
106 // helper funtion for hex_encode
107 std::string hex_encode(const char * source, size_t srclen);
108
109 // Apply any suitable string transform (including the ones above) to an STL
110 // string. Stack-allocated temporary space is used for the transformation,
111 // so value and source may refer to the same string.
112 typedef size_t (*Transform)(char * buffer, size_t buflen,
113 const char * source, size_t srclen);
114 size_t transform(std::string& value, size_t maxlen, const std::string& source,
115 Transform t);
116
117 // Return the result of applying transform t to source.
118 std::string s_transform(const std::string& source, Transform t);
119
120 // Convenience wrappers
s_url_encode(const std::string & source)121 inline std::string s_url_encode(const std::string& source) {
122 return s_transform(source, url_encode);
123 }
s_url_decode(const std::string & source)124 inline std::string s_url_decode(const std::string& source) {
125 return s_transform(source, url_decode);
126 }
127
128 // Splits the source string into multiple fields separated by delimiter,
129 // with duplicates of delimiter creating empty fields.
130 size_t split(const std::string& source, char delimiter,
131 std::vector<std::string>* fields);
132
133 // Splits the source string into multiple fields separated by delimiter,
134 // with duplicates of delimiter ignored. Trailing delimiter ignored.
135 size_t tokenize(const std::string& source, char delimiter,
136 std::vector<std::string>* fields);
137
138 // Tokenize and append the tokens to fields. Return the new size of fields.
139 size_t tokenize_append(const std::string& source, char delimiter,
140 std::vector<std::string>* fields);
141
142 // Splits the source string into multiple fields separated by delimiter, with
143 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in
144 // between the start_mark and the end_mark is treated as a single field. Return
145 // the size of fields. For example, if source is "filename
146 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and
147 // the start_mark and end_mark are '"', this method returns two fields:
148 // "filename" and "/Library/Application Support/media content.txt".
149 size_t tokenize(const std::string& source, char delimiter, char start_mark,
150 char end_mark, std::vector<std::string>* fields);
151
152 // Safe sprintf to std::string
153 //void sprintf(std::string& value, size_t maxlen, const char * format, ...)
154 // PRINTF_FORMAT(3);
155
156 // Convert arbitrary values to/from a string.
157
158 template <class T>
ToString(const T & t,std::string * s)159 static bool ToString(const T &t, std::string* s) {
160 std::ostringstream oss;
161 oss << t;
162 *s = oss.str();
163 return !oss.fail();
164 }
165
166 template <class T>
FromString(const std::string & s,T * t)167 static bool FromString(const std::string& s, T* t) {
168 std::istringstream iss(s);
169 iss >> *t;
170 return !iss.fail();
171 }
172
173 // Inline versions of the string conversion routines.
174
175 template<typename T>
ToString(T val)176 static inline std::string ToString(T val) {
177 std::string str; ToString(val, &str); return str;
178 }
179
180 template<typename T>
FromString(const std::string & str)181 static inline T FromString(const std::string& str) {
182 T val; FromString(str, &val); return val;
183 }
184
185 template<typename T>
FromString(const T & defaultValue,const std::string & str)186 static inline T FromString(const T& defaultValue, const std::string& str) {
187 T val(defaultValue); FromString(str, &val); return val;
188 }
189
190 // simple function to strip out characters which shouldn't be
191 // used in filenames
192 char make_char_safe_for_filename(char c);
193
194 //////////////////////////////////////////////////////////////////////
195
196 } // namespace talk_base
197
198 #endif // TALK_BASE_STRINGENCODE_H__
199