1 /*
2  * libjingle
3  * Copyright 2004--2011, Google Inc.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *  2. Redistributions in binary form must reproduce the above copyright notice,
11  *     this list of conditions and the following disclaimer in the documentation
12  *     and/or other materials provided with the distribution.
13  *  3. The name of the author may not be used to endorse or promote products
14  *     derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #ifndef TALK_BASE_STRINGENCODE_H__
29 #define TALK_BASE_STRINGENCODE_H__
30 
31 #include <string>
32 #include <sstream>
33 #include <vector>
34 
35 namespace talk_base {
36 
37 //////////////////////////////////////////////////////////////////////
38 // String Encoding Utilities
39 //////////////////////////////////////////////////////////////////////
40 
41 // Convert an unsigned value from 0 to 15 to the hex character equivalent...
42 char hex_encode(unsigned char val);
43 // ...and vice-versa.
44 bool hex_decode(char ch, unsigned char* val);
45 
46 // Convert an unsigned value to it's utf8 representation.  Returns the length
47 // of the encoded string, or 0 if the encoding is longer than buflen - 1.
48 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
49 // Decode the utf8 encoded value pointed to by source.  Returns the number of
50 // bytes used by the encoding, or 0 if the encoding is invalid.
51 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
52 
53 // Escaping prefixes illegal characters with the escape character.  Compact, but
54 // illegal characters still appear in the string.
55 size_t escape(char * buffer, size_t buflen,
56               const char * source, size_t srclen,
57               const char * illegal, char escape);
58 // Note: in-place unescaping (buffer == source) is allowed.
59 size_t unescape(char * buffer, size_t buflen,
60                 const char * source, size_t srclen,
61                 char escape);
62 
63 // Encoding replaces illegal characters with the escape character and 2 hex
64 // chars, so it's a little less compact than escape, but completely removes
65 // illegal characters.  note that hex digits should not be used as illegal
66 // characters.
67 size_t encode(char * buffer, size_t buflen,
68               const char * source, size_t srclen,
69               const char * illegal, char escape);
70 // Note: in-place decoding (buffer == source) is allowed.
71 size_t decode(char * buffer, size_t buflen,
72               const char * source, size_t srclen,
73               char escape);
74 
75 // Returns a list of characters that may be unsafe for use in the name of a
76 // file, suitable for passing to the 'illegal' member of escape or encode.
77 const char* unsafe_filename_characters();
78 
79 // url_encode is an encode operation with a predefined set of illegal characters
80 // and escape character (for use in URLs, obviously).
81 size_t url_encode(char * buffer, size_t buflen,
82                   const char * source, size_t srclen);
83 // Note: in-place decoding (buffer == source) is allowed.
84 size_t url_decode(char * buffer, size_t buflen,
85                   const char * source, size_t srclen);
86 
87 // html_encode prevents data embedded in html from containing markup.
88 size_t html_encode(char * buffer, size_t buflen,
89                    const char * source, size_t srclen);
90 // Note: in-place decoding (buffer == source) is allowed.
91 size_t html_decode(char * buffer, size_t buflen,
92                    const char * source, size_t srclen);
93 
94 // xml_encode makes data suitable for inside xml attributes and values.
95 size_t xml_encode(char * buffer, size_t buflen,
96                   const char * source, size_t srclen);
97 // Note: in-place decoding (buffer == source) is allowed.
98 size_t xml_decode(char * buffer, size_t buflen,
99                   const char * source, size_t srclen);
100 
101 // hex_encode shows the hex representation of binary data in ascii.
102 size_t hex_encode(char * buffer, size_t buflen,
103                   const char * source, size_t srclen);
104 size_t hex_decode(char * buffer, size_t buflen,
105                   const char * source, size_t srclen);
106 // helper funtion for hex_encode
107 std::string hex_encode(const char * source, size_t srclen);
108 
109 // Apply any suitable string transform (including the ones above) to an STL
110 // string.  Stack-allocated temporary space is used for the transformation,
111 // so value and source may refer to the same string.
112 typedef size_t (*Transform)(char * buffer, size_t buflen,
113                             const char * source, size_t srclen);
114 size_t transform(std::string& value, size_t maxlen, const std::string& source,
115                  Transform t);
116 
117 // Return the result of applying transform t to source.
118 std::string s_transform(const std::string& source, Transform t);
119 
120 // Convenience wrappers
s_url_encode(const std::string & source)121 inline std::string s_url_encode(const std::string& source) {
122   return s_transform(source, url_encode);
123 }
s_url_decode(const std::string & source)124 inline std::string s_url_decode(const std::string& source) {
125   return s_transform(source, url_decode);
126 }
127 
128 // Splits the source string into multiple fields separated by delimiter,
129 // with duplicates of delimiter creating empty fields.
130 size_t split(const std::string& source, char delimiter,
131              std::vector<std::string>* fields);
132 
133 // Splits the source string into multiple fields separated by delimiter,
134 // with duplicates of delimiter ignored.  Trailing delimiter ignored.
135 size_t tokenize(const std::string& source, char delimiter,
136                 std::vector<std::string>* fields);
137 
138 // Tokenize and append the tokens to fields. Return the new size of fields.
139 size_t tokenize_append(const std::string& source, char delimiter,
140                        std::vector<std::string>* fields);
141 
142 // Splits the source string into multiple fields separated by delimiter, with
143 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in
144 // between the start_mark and the end_mark is treated as a single field. Return
145 // the size of fields. For example, if source is "filename
146 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and
147 // the start_mark and end_mark are '"', this method returns two fields:
148 // "filename" and "/Library/Application Support/media content.txt".
149 size_t tokenize(const std::string& source, char delimiter, char start_mark,
150                 char end_mark, std::vector<std::string>* fields);
151 
152 // Safe sprintf to std::string
153 //void sprintf(std::string& value, size_t maxlen, const char * format, ...)
154 //     PRINTF_FORMAT(3);
155 
156 // Convert arbitrary values to/from a string.
157 
158 template <class T>
ToString(const T & t,std::string * s)159 static bool ToString(const T &t, std::string* s) {
160   std::ostringstream oss;
161   oss << t;
162   *s = oss.str();
163   return !oss.fail();
164 }
165 
166 template <class T>
FromString(const std::string & s,T * t)167 static bool FromString(const std::string& s, T* t) {
168   std::istringstream iss(s);
169   iss >> *t;
170   return !iss.fail();
171 }
172 
173 // Inline versions of the string conversion routines.
174 
175 template<typename T>
ToString(T val)176 static inline std::string ToString(T val) {
177   std::string str; ToString(val, &str); return str;
178 }
179 
180 template<typename T>
FromString(const std::string & str)181 static inline T FromString(const std::string& str) {
182   T val; FromString(str, &val); return val;
183 }
184 
185 template<typename T>
FromString(const T & defaultValue,const std::string & str)186 static inline T FromString(const T& defaultValue, const std::string& str) {
187   T val(defaultValue); FromString(str, &val); return val;
188 }
189 
190 // simple function to strip out characters which shouldn't be
191 // used in filenames
192 char make_char_safe_for_filename(char c);
193 
194 //////////////////////////////////////////////////////////////////////
195 
196 }  // namespace talk_base
197 
198 #endif  // TALK_BASE_STRINGENCODE_H__
199