1 //
2 // VMime library (http://www.vmime.org)
3 // Copyright (C) 2002-2013 Vincent Richard <vincent@vmime.org>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 3 of
8 // the License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License along
16 // with this program; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Linking this library statically or dynamically with other modules is making
20 // a combined work based on this library. Thus, the terms and conditions of
21 // the GNU General Public License cover the whole combination.
22 //
23
24 #include "vmime/utility/encoder/qpEncoder.hpp"
25 #include "vmime/parserHelpers.hpp"
26
27
28 namespace vmime {
29 namespace utility {
30 namespace encoder {
31
32
qpEncoder()33 qpEncoder::qpEncoder()
34 {
35 }
36
37
getAvailableProperties() const38 const std::vector <string> qpEncoder::getAvailableProperties() const
39 {
40 std::vector <string> list(encoder::getAvailableProperties());
41
42 list.push_back("maxlinelength");
43
44 list.push_back("text"); // if set, '\r' and '\n' are not hex-encoded.
45 // WARNING! You should not use this for binary data!
46
47 list.push_back("rfc2047"); // for header fields encoding (RFC #2047)
48
49 return (list);
50 }
51
52
53
54 // Hex-encoding table
55 const unsigned char qpEncoder::sm_hexDigits[] = "0123456789ABCDEF";
56
57
58 // RFC-2047 encoding table: we always encode RFC-2047 using the restricted
59 // charset, that is the one used for 'phrase' in From/To/Cc/... headers.
60 //
61 // " The set of characters that may be used in a "Q"-encoded 'encoded-word'
62 // is restricted to: <upper and lower case ASCII letters, decimal digits,
63 // "!", "*", "+", "-", "/", "=", and "_" (underscore, ASCII 95.)>. "
64 //
65 // Two special cases:
66 // - encode space (32) as underscore (95)
67 // - encode underscore as hex (=5F)
68 //
69 // This is a quick lookup table:
70 // '1' means "encode", '0' means "no encoding"
71 //
72 const vmime_uint8 qpEncoder::sm_RFC2047EncodeTable[] =
73 {
74 /* 0 NUL */ 1, /* 1 SOH */ 1, /* 2 STX */ 1, /* 3 ETX */ 1, /* 4 EOT */ 1, /* 5 ENQ */ 1,
75 /* 6 ACK */ 1, /* 7 BEL */ 1, /* 8 BS */ 1, /* 9 TAB */ 1, /* 10 LF */ 1, /* 11 VT */ 1,
76 /* 12 FF */ 1, /* 13 CR */ 1, /* 14 SO */ 1, /* 15 SI */ 1, /* 16 DLE */ 1, /* 17 DC1 */ 1,
77 /* 18 DC2 */ 1, /* 19 DC3 */ 1, /* 20 DC4 */ 1, /* 21 NAK */ 1, /* 22 SYN */ 1, /* 23 ETB */ 1,
78 /* 24 CAN */ 1, /* 25 EM */ 1, /* 26 SUB */ 1, /* 27 ESC */ 1, /* 28 FS */ 1, /* 29 GS */ 1,
79 /* 30 RS */ 1, /* 31 US */ 1, /* 32 SPACE*/ 1, /* 33 ! */ 0, /* 34 " */ 1, /* 35 # */ 1,
80 /* 36 $ */ 1, /* 37 % */ 1, /* 38 & */ 1, /* 39 ' */ 1, /* 40 ( */ 1, /* 41 ) */ 1,
81 /* 42 * */ 0, /* 43 + */ 0, /* 44 , */ 1, /* 45 - */ 0, /* 46 . */ 1, /* 47 / */ 0,
82 /* 48 0 */ 0, /* 49 1 */ 0, /* 50 2 */ 0, /* 51 3 */ 0, /* 52 4 */ 0, /* 53 5 */ 0,
83 /* 54 6 */ 0, /* 55 7 */ 0, /* 56 8 */ 0, /* 57 9 */ 0, /* 58 : */ 1, /* 59 ; */ 1,
84 /* 60 < */ 1, /* 61 = */ 1, /* 62 > */ 1, /* 63 ? */ 1, /* 64 @ */ 1, /* 65 A */ 0,
85 /* 66 B */ 0, /* 67 C */ 0, /* 68 D */ 0, /* 69 E */ 0, /* 70 F */ 0, /* 71 G */ 0,
86 /* 72 H */ 0, /* 73 I */ 0, /* 74 J */ 0, /* 75 K */ 0, /* 76 L */ 0, /* 77 M */ 0,
87 /* 78 N */ 0, /* 79 O */ 0, /* 80 P */ 0, /* 81 Q */ 0, /* 82 R */ 0, /* 83 S */ 0,
88 /* 84 T */ 0, /* 85 U */ 0, /* 86 V */ 0, /* 87 W */ 0, /* 88 X */ 0, /* 89 Y */ 0,
89 /* 90 Z */ 0, /* 91 [ */ 1, /* 92 " */ 1, /* 93 ] */ 1, /* 94 ^ */ 1, /* 95 _ */ 1,
90 /* 96 ` */ 1, /* 97 a */ 0, /* 98 b */ 0, /* 99 c */ 0, /* 100 d */ 0, /* 101 e */ 0,
91 /* 102 f */ 0, /* 103 g */ 0, /* 104 h */ 0, /* 105 i */ 0, /* 106 j */ 0, /* 107 k */ 0,
92 /* 108 l */ 0, /* 109 m */ 0, /* 110 n */ 0, /* 111 o */ 0, /* 112 p */ 0, /* 113 q */ 0,
93 /* 114 r */ 0, /* 115 s */ 0, /* 116 t */ 0, /* 117 u */ 0, /* 118 v */ 0, /* 119 w */ 0,
94 /* 120 x */ 0, /* 121 y */ 0, /* 122 z */ 0, /* 123 { */ 1, /* 124 | */ 1, /* 125 } */ 1,
95 /* 126 ~ */ 1, /* 127 DEL */ 1
96 };
97
98
99 // Hex-decoding table
100 const vmime_uint8 qpEncoder::sm_hexDecodeTable[256] =
101 {
102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
106 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
111 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
112 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
118 };
119
120
121 // static
RFC2047_isEncodingNeededForChar(const byte_t c)122 bool qpEncoder::RFC2047_isEncodingNeededForChar(const byte_t c)
123 {
124 return (c >= 128 || sm_RFC2047EncodeTable[c] != 0);
125 }
126
127
128 // static
RFC2047_getEncodedLength(const byte_t c)129 int qpEncoder::RFC2047_getEncodedLength(const byte_t c)
130 {
131 if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
132 {
133 if (c == 32) // space
134 {
135 // Encoded as "_"
136 return 1;
137 }
138 else
139 {
140 // Hex encoding
141 return 3;
142 }
143 }
144 else
145 {
146 return 1; // no encoding
147 }
148 }
149
150
151 #ifndef VMIME_BUILDING_DOC
152
153 #define QP_ENCODE_HEX(x) \
154 outBuffer[outBufferPos] = '='; \
155 outBuffer[outBufferPos + 1] = sm_hexDigits[x >> 4]; \
156 outBuffer[outBufferPos + 2] = sm_hexDigits[x & 0xF]; \
157 outBufferPos += 3; \
158 curCol += 3
159
160 #define QP_WRITE(s, x, l) s.write(reinterpret_cast <byte_t*>(x), l)
161
162 #endif // VMIME_BUILDING_DOC
163
164
encode(utility::inputStream & in,utility::outputStream & out,utility::progressListener * progress)165 size_t qpEncoder::encode(utility::inputStream& in,
166 utility::outputStream& out, utility::progressListener* progress)
167 {
168 in.reset(); // may not work...
169
170 const size_t propMaxLineLength =
171 getProperties().getProperty <size_t>("maxlinelength", static_cast <size_t>(-1));
172
173 const bool rfc2047 = getProperties().getProperty <bool>("rfc2047", false);
174 const bool text = getProperties().getProperty <bool>("text", false); // binary mode by default
175
176 const bool cutLines = (propMaxLineLength != static_cast <size_t>(-1));
177 const size_t maxLineLength = std::min(propMaxLineLength, static_cast <size_t>(74));
178
179 // Process the data
180 byte_t buffer[16384];
181 size_t bufferLength = 0;
182 size_t bufferPos = 0;
183
184 size_t curCol = 0;
185
186 byte_t outBuffer[16384];
187 size_t outBufferPos = 0;
188
189 size_t total = 0;
190 size_t inTotal = 0;
191
192 if (progress)
193 progress->start(0);
194
195 while (bufferPos < bufferLength || !in.eof())
196 {
197 // Flush current output buffer
198 if (outBufferPos + 6 >= static_cast <int>(sizeof(outBuffer)))
199 {
200 QP_WRITE(out, outBuffer, outBufferPos);
201
202 total += outBufferPos;
203 outBufferPos = 0;
204 }
205
206 // Need to get more data?
207 if (bufferPos >= bufferLength)
208 {
209 bufferLength = in.read(buffer, sizeof(buffer));
210 bufferPos = 0;
211
212 // No more data
213 if (bufferLength == 0)
214 break;
215 }
216
217 // Get the next char and encode it
218 const byte_t c = buffer[bufferPos++];
219
220 if (rfc2047)
221 {
222 if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
223 {
224 if (c == 32) // space
225 {
226 // RFC-2047, Page 5, 4.2. The "Q" encoding:
227 // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
228 // represented as "_" (underscore, ASCII 95.). >>
229 outBuffer[outBufferPos++] = '_';
230 ++curCol;
231 }
232 else
233 {
234 // Other characters: '=' + hexadecimal encoding
235 QP_ENCODE_HEX(c);
236 }
237 }
238 else
239 {
240 // No encoding
241 outBuffer[outBufferPos++] = c;
242 ++curCol;
243 }
244 }
245 else
246 {
247 switch (c)
248 {
249 case 46: // .
250 {
251 if (curCol == 0)
252 {
253 // If a '.' appears at the beginning of a line, we encode it to
254 // to avoid problems with SMTP servers... ("\r\n.\r\n" means the
255 // end of data transmission).
256 QP_ENCODE_HEX('.');
257 continue;
258 }
259
260 outBuffer[outBufferPos++] = '.';
261 ++curCol;
262 break;
263 }
264 case 32: // space
265 {
266 // Need to get more data?
267 if (bufferPos >= bufferLength)
268 {
269 bufferLength = in.read(buffer, sizeof(buffer));
270 bufferPos = 0;
271 }
272
273 // Spaces cannot appear at the end of a line. So, encode the space.
274 if (bufferPos >= bufferLength ||
275 (buffer[bufferPos] == '\r' || buffer[bufferPos] == '\n'))
276 {
277 QP_ENCODE_HEX(' ');
278 }
279 else
280 {
281 outBuffer[outBufferPos++] = ' ';
282 ++curCol;
283 }
284
285 break;
286 }
287 case 9: // TAB
288 {
289 QP_ENCODE_HEX(c);
290 break;
291 }
292 case 13: // CR
293 case 10: // LF
294 {
295 // RFC-2045/6.7(4)
296
297 // Text data
298 if (text && !rfc2047)
299 {
300 outBuffer[outBufferPos++] = c;
301 ++curCol;
302
303 if (c == 10)
304 curCol = 0; // reset current line length
305 }
306 // Binary data
307 else
308 {
309 QP_ENCODE_HEX(c);
310 }
311
312 break;
313 }
314 case 61: // =
315 {
316 QP_ENCODE_HEX('=');
317 break;
318 }
319 /*
320 Rule #2: (Literal representation) Octets with decimal values of 33
321 through 60 inclusive, and 62 through 126, inclusive, MAY be
322 represented as the ASCII characters which correspond to those
323 octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN
324 through TILDE, respectively).
325 */
326 default:
327
328 //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126))
329 if (c >= 33 && c <= 126 && c != 61 && c != 63)
330 {
331 outBuffer[outBufferPos++] = c;
332 ++curCol;
333 }
334 // Other characters: '=' + hexadecimal encoding
335 else
336 {
337 QP_ENCODE_HEX(c);
338 }
339
340 break;
341
342 } // switch (c)
343
344 // Soft line break : "=\r\n"
345 if (cutLines && curCol >= maxLineLength - 1)
346 {
347 outBuffer[outBufferPos] = '=';
348 outBuffer[outBufferPos + 1] = '\r';
349 outBuffer[outBufferPos + 2] = '\n';
350
351 outBufferPos += 3;
352 curCol = 0;
353 }
354
355 } // !rfc2047
356
357 ++inTotal;
358
359 if (progress)
360 progress->progress(inTotal, inTotal);
361 }
362
363 // Flush remaining output buffer
364 if (outBufferPos != 0)
365 {
366 QP_WRITE(out, outBuffer, outBufferPos);
367 total += outBufferPos;
368 }
369
370 if (progress)
371 progress->stop(inTotal);
372
373 return (total);
374 }
375
376
decode(utility::inputStream & in,utility::outputStream & out,utility::progressListener * progress)377 size_t qpEncoder::decode(utility::inputStream& in,
378 utility::outputStream& out, utility::progressListener* progress)
379 {
380 in.reset(); // may not work...
381
382 // Process the data
383 const bool rfc2047 = getProperties().getProperty <bool>("rfc2047", false);
384
385 byte_t buffer[16384];
386 size_t bufferLength = 0;
387 size_t bufferPos = 0;
388
389 byte_t outBuffer[16384];
390 size_t outBufferPos = 0;
391
392 size_t total = 0;
393 size_t inTotal = 0;
394
395 while (bufferPos < bufferLength || !in.eof())
396 {
397 // Flush current output buffer
398 if (outBufferPos >= sizeof(outBuffer))
399 {
400 QP_WRITE(out, outBuffer, outBufferPos);
401
402 total += outBufferPos;
403 outBufferPos = 0;
404 }
405
406 // Need to get more data?
407 if (bufferPos >= bufferLength)
408 {
409 bufferLength = in.read(buffer, sizeof(buffer));
410 bufferPos = 0;
411
412 // No more data
413 if (bufferLength == 0)
414 break;
415 }
416
417 // Decode the next sequence (hex-encoded byte or printable character)
418 byte_t c = buffer[bufferPos++];
419
420 ++inTotal;
421
422 switch (c)
423 {
424 case '=':
425 {
426 if (bufferPos >= bufferLength)
427 {
428 bufferLength = in.read(buffer, sizeof(buffer));
429 bufferPos = 0;
430 }
431
432 if (bufferPos < bufferLength)
433 {
434 c = buffer[bufferPos++];
435
436 ++inTotal;
437
438 switch (c)
439 {
440 // Ignore soft line break ("=\r\n" or "=\n")
441 case '\r':
442
443 // Read one byte more
444 if (bufferPos >= bufferLength)
445 {
446 bufferLength = in.read(buffer, sizeof(buffer));
447 bufferPos = 0;
448 }
449
450 if (bufferPos < bufferLength)
451 {
452 ++bufferPos;
453 ++inTotal;
454 }
455
456 break;
457
458 case '\n':
459
460 break;
461
462 // Hex-encoded char
463 default:
464 {
465 // We need another byte...
466 if (bufferPos >= bufferLength)
467 {
468 bufferLength = in.read(buffer, sizeof(buffer));
469 bufferPos = 0;
470 }
471
472 if (bufferPos < bufferLength)
473 {
474 const byte_t next = buffer[bufferPos++];
475
476 ++inTotal;
477
478 const byte_t value = static_cast <byte_t>
479 (sm_hexDecodeTable[c] * 16 + sm_hexDecodeTable[next]);
480
481 outBuffer[outBufferPos++] = value;
482 }
483 else
484 {
485 // Premature end-of-data
486 }
487
488 break;
489 }
490
491 }
492 }
493 else
494 {
495 // Premature end-of-data
496 }
497
498 break;
499 }
500 case '_':
501 {
502 if (rfc2047)
503 {
504 // RFC-2047, Page 5, 4.2. The "Q" encoding:
505 // << Note that the "_" always represents hexadecimal 20, even if the SPACE
506 // character occupies a different code position in the character set in use. >>
507 outBuffer[outBufferPos++] = 0x20;
508 break;
509 }
510
511 // no break here...
512 }
513 default:
514 {
515 outBuffer[outBufferPos++] = c;
516 }
517
518 }
519
520 if (progress)
521 progress->progress(inTotal, inTotal);
522 }
523
524 // Flush remaining output buffer
525 if (outBufferPos != 0)
526 {
527 QP_WRITE(out, outBuffer, outBufferPos);
528 total += outBufferPos;
529 }
530
531 if (progress)
532 progress->stop(inTotal);
533
534 return (total);
535 }
536
537
getEncodedSize(const size_t n) const538 size_t qpEncoder::getEncodedSize(const size_t n) const
539 {
540 const size_t propMaxLineLength =
541 getProperties().getProperty <size_t>("maxlinelength", static_cast <size_t>(-1));
542
543 const bool cutLines = (propMaxLineLength != static_cast <size_t>(-1));
544 const size_t maxLineLength = std::min(propMaxLineLength, static_cast <size_t>(74));
545
546 // Worst cast: 1 byte of input provide 3 bytes of output
547 // Count CRLF (2 bytes) for each line.
548 return n * 3 + (cutLines ? (n / maxLineLength) * 2 : 0);
549 }
550
551
getDecodedSize(const size_t n) const552 size_t qpEncoder::getDecodedSize(const size_t n) const
553 {
554 // Worst case: 1 byte of input equals 1 byte of output
555 return n;
556 }
557
558
559 } // encoder
560 } // utility
561 } // vmime
562