1 //
2 // VMime library (http://www.vmime.org)
3 // Copyright (C) 2002-2013 Vincent Richard <vincent@vmime.org>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 3 of
8 // the License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License along
16 // with this program; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Linking this library statically or dynamically with other modules is making
20 // a combined work based on this library.  Thus, the terms and conditions of
21 // the GNU General Public License cover the whole combination.
22 //
23 
24 #include "vmime/utility/encoder/qpEncoder.hpp"
25 #include "vmime/parserHelpers.hpp"
26 
27 
28 namespace vmime {
29 namespace utility {
30 namespace encoder {
31 
32 
qpEncoder()33 qpEncoder::qpEncoder()
34 {
35 }
36 
37 
getAvailableProperties() const38 const std::vector <string> qpEncoder::getAvailableProperties() const
39 {
40 	std::vector <string> list(encoder::getAvailableProperties());
41 
42 	list.push_back("maxlinelength");
43 
44 	list.push_back("text");  // if set, '\r' and '\n' are not hex-encoded.
45 	                         // WARNING! You should not use this for binary data!
46 
47 	list.push_back("rfc2047");   // for header fields encoding (RFC #2047)
48 
49 	return (list);
50 }
51 
52 
53 
54 // Hex-encoding table
55 const unsigned char qpEncoder::sm_hexDigits[] = "0123456789ABCDEF";
56 
57 
58 // RFC-2047 encoding table: we always encode RFC-2047 using the restricted
59 // charset, that is the one used for 'phrase' in From/To/Cc/... headers.
60 //
61 // " The set of characters that may be used in a "Q"-encoded 'encoded-word'
62 //   is restricted to: <upper and lower case ASCII letters, decimal digits,
63 //   "!", "*", "+", "-", "/", "=", and "_" (underscore, ASCII 95.)>. "
64 //
65 // Two special cases:
66 // - encode space (32) as underscore (95)
67 // - encode underscore as hex (=5F)
68 //
69 // This is a quick lookup table:
70 //   '1' means "encode", '0' means "no encoding"
71 //
72 const vmime_uint8 qpEncoder::sm_RFC2047EncodeTable[] =
73 {
74 	/*   0  NUL */ 1, /*   1  SOH */ 1, /*   2  STX */ 1, /*   3  ETX */ 1, /*   4  EOT */ 1, /*   5  ENQ */ 1,
75 	/*   6  ACK */ 1, /*   7  BEL */ 1, /*   8   BS */ 1, /*   9  TAB */ 1, /*  10   LF */ 1, /*  11   VT */ 1,
76 	/*  12   FF */ 1, /*  13   CR */ 1, /*  14   SO */ 1, /*  15   SI */ 1, /*  16  DLE */ 1, /*  17  DC1 */ 1,
77 	/*  18  DC2 */ 1, /*  19  DC3 */ 1, /*  20  DC4 */ 1, /*  21  NAK */ 1, /*  22  SYN */ 1, /*  23  ETB */ 1,
78 	/*  24  CAN */ 1, /*  25   EM */ 1, /*  26  SUB */ 1, /*  27  ESC */ 1, /*  28   FS */ 1, /*  29   GS */ 1,
79 	/*  30   RS */ 1, /*  31   US */ 1, /*  32 SPACE*/ 1, /*  33    ! */ 0, /*  34    " */ 1, /*  35    # */ 1,
80 	/*  36    $ */ 1, /*  37    % */ 1, /*  38    & */ 1, /*  39    ' */ 1, /*  40    ( */ 1, /*  41    ) */ 1,
81 	/*  42    * */ 0, /*  43    + */ 0, /*  44    , */ 1, /*  45    - */ 0, /*  46    . */ 1, /*  47    / */ 0,
82 	/*  48    0 */ 0, /*  49    1 */ 0, /*  50    2 */ 0, /*  51    3 */ 0, /*  52    4 */ 0, /*  53    5 */ 0,
83 	/*  54    6 */ 0, /*  55    7 */ 0, /*  56    8 */ 0, /*  57    9 */ 0, /*  58    : */ 1, /*  59    ; */ 1,
84 	/*  60    < */ 1, /*  61    = */ 1, /*  62    > */ 1, /*  63    ? */ 1, /*  64    @ */ 1, /*  65    A */ 0,
85 	/*  66    B */ 0, /*  67    C */ 0, /*  68    D */ 0, /*  69    E */ 0, /*  70    F */ 0, /*  71    G */ 0,
86 	/*  72    H */ 0, /*  73    I */ 0, /*  74    J */ 0, /*  75    K */ 0, /*  76    L */ 0, /*  77    M */ 0,
87 	/*  78    N */ 0, /*  79    O */ 0, /*  80    P */ 0, /*  81    Q */ 0, /*  82    R */ 0, /*  83    S */ 0,
88 	/*  84    T */ 0, /*  85    U */ 0, /*  86    V */ 0, /*  87    W */ 0, /*  88    X */ 0, /*  89    Y */ 0,
89 	/*  90    Z */ 0, /*  91    [ */ 1, /*  92    " */ 1, /*  93    ] */ 1, /*  94    ^ */ 1, /*  95    _ */ 1,
90 	/*  96    ` */ 1, /*  97    a */ 0, /*  98    b */ 0, /*  99    c */ 0, /* 100    d */ 0, /* 101    e */ 0,
91 	/* 102    f */ 0, /* 103    g */ 0, /* 104    h */ 0, /* 105    i */ 0, /* 106    j */ 0, /* 107    k */ 0,
92 	/* 108    l */ 0, /* 109    m */ 0, /* 110    n */ 0, /* 111    o */ 0, /* 112    p */ 0, /* 113    q */ 0,
93 	/* 114    r */ 0, /* 115    s */ 0, /* 116    t */ 0, /* 117    u */ 0, /* 118    v */ 0, /* 119    w */ 0,
94 	/* 120    x */ 0, /* 121    y */ 0, /* 122    z */ 0, /* 123    { */ 1, /* 124    | */ 1, /* 125    } */ 1,
95 	/* 126    ~ */ 1, /* 127  DEL */ 1
96 };
97 
98 
99 // Hex-decoding table
100 const vmime_uint8 qpEncoder::sm_hexDecodeTable[256] =
101 {
102 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
103 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
104 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
105 	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0,
106 	 0, 10, 11, 12, 13, 14, 15,  0,  0,  0,  0,  0,  0,  0,  0,  0,
107 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
108 	 0, 10, 11, 12, 13, 14, 15,  0,  0,  0,  0,  0,  0,  0,  0,  0,
109 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
110 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
111 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
112 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
113 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
114 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
115 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
116 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
117 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
118 };
119 
120 
121 // static
RFC2047_isEncodingNeededForChar(const byte_t c)122 bool qpEncoder::RFC2047_isEncodingNeededForChar(const byte_t c)
123 {
124 	return (c >= 128 || sm_RFC2047EncodeTable[c] != 0);
125 }
126 
127 
128 // static
RFC2047_getEncodedLength(const byte_t c)129 int qpEncoder::RFC2047_getEncodedLength(const byte_t c)
130 {
131 	if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
132 	{
133 		if (c == 32)  // space
134 		{
135 			// Encoded as "_"
136 			return 1;
137 		}
138 		else
139 		{
140 			// Hex encoding
141 			return 3;
142 		}
143 	}
144 	else
145 	{
146 		return 1;  // no encoding
147 	}
148 }
149 
150 
151 #ifndef VMIME_BUILDING_DOC
152 
153 #define QP_ENCODE_HEX(x) \
154 	outBuffer[outBufferPos] = '=';                           \
155 	outBuffer[outBufferPos + 1] = sm_hexDigits[x >> 4];  \
156 	outBuffer[outBufferPos + 2] = sm_hexDigits[x & 0xF]; \
157 	outBufferPos += 3;                                       \
158 	curCol += 3
159 
160 #define QP_WRITE(s, x, l) s.write(reinterpret_cast <byte_t*>(x), l)
161 
162 #endif // VMIME_BUILDING_DOC
163 
164 
encode(utility::inputStream & in,utility::outputStream & out,utility::progressListener * progress)165 size_t qpEncoder::encode(utility::inputStream& in,
166 	utility::outputStream& out, utility::progressListener* progress)
167 {
168 	in.reset();  // may not work...
169 
170 	const size_t propMaxLineLength =
171 		getProperties().getProperty <size_t>("maxlinelength", static_cast <size_t>(-1));
172 
173 	const bool rfc2047 = getProperties().getProperty <bool>("rfc2047", false);
174 	const bool text = getProperties().getProperty <bool>("text", false);  // binary mode by default
175 
176 	const bool cutLines = (propMaxLineLength != static_cast <size_t>(-1));
177 	const size_t maxLineLength = std::min(propMaxLineLength, static_cast <size_t>(74));
178 
179 	// Process the data
180 	byte_t buffer[16384];
181 	size_t bufferLength = 0;
182 	size_t bufferPos = 0;
183 
184 	size_t curCol = 0;
185 
186 	byte_t outBuffer[16384];
187 	size_t outBufferPos = 0;
188 
189 	size_t total = 0;
190 	size_t inTotal = 0;
191 
192 	if (progress)
193 		progress->start(0);
194 
195 	while (bufferPos < bufferLength || !in.eof())
196 	{
197 		// Flush current output buffer
198 		if (outBufferPos + 6 >= static_cast <int>(sizeof(outBuffer)))
199 		{
200 			QP_WRITE(out, outBuffer, outBufferPos);
201 
202 			total += outBufferPos;
203 			outBufferPos = 0;
204 		}
205 
206 		// Need to get more data?
207 		if (bufferPos >= bufferLength)
208 		{
209 			bufferLength = in.read(buffer, sizeof(buffer));
210 			bufferPos = 0;
211 
212 			// No more data
213 			if (bufferLength == 0)
214 				break;
215 		}
216 
217 		// Get the next char and encode it
218 		const byte_t c = buffer[bufferPos++];
219 
220 		if (rfc2047)
221 		{
222 			if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
223 			{
224 				if (c == 32)  // space
225 				{
226 					// RFC-2047, Page 5, 4.2. The "Q" encoding:
227 					// << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
228 					// represented as "_" (underscore, ASCII 95.). >>
229 					outBuffer[outBufferPos++] = '_';
230 					++curCol;
231 				}
232 				else
233 				{
234 					// Other characters: '=' + hexadecimal encoding
235 					QP_ENCODE_HEX(c);
236 				}
237 			}
238 			else
239 			{
240 				// No encoding
241 				outBuffer[outBufferPos++] = c;
242 				++curCol;
243 			}
244 		}
245 		else
246 		{
247 			switch (c)
248 			{
249 			case 46:  // .
250 			{
251 				if (curCol == 0)
252 				{
253 					// If a '.' appears at the beginning of a line, we encode it to
254 					// to avoid problems with SMTP servers... ("\r\n.\r\n" means the
255 					// end of data transmission).
256 					QP_ENCODE_HEX('.');
257 					continue;
258 				}
259 
260 				outBuffer[outBufferPos++] = '.';
261 				++curCol;
262 				break;
263 			}
264 			case 32:  // space
265 			{
266 				// Need to get more data?
267 				if (bufferPos >= bufferLength)
268 				{
269 					bufferLength = in.read(buffer, sizeof(buffer));
270 					bufferPos = 0;
271 				}
272 
273 				// Spaces cannot appear at the end of a line. So, encode the space.
274 				if (bufferPos >= bufferLength ||
275 				    (buffer[bufferPos] == '\r' || buffer[bufferPos] == '\n'))
276 				{
277 					QP_ENCODE_HEX(' ');
278 				}
279 				else
280 				{
281 					outBuffer[outBufferPos++] = ' ';
282 					++curCol;
283 				}
284 
285 				break;
286 			}
287 			case 9:   // TAB
288 			{
289 				QP_ENCODE_HEX(c);
290 				break;
291 			}
292 			case 13:  // CR
293 			case 10:  // LF
294 			{
295 				// RFC-2045/6.7(4)
296 
297 				// Text data
298 				if (text && !rfc2047)
299 				{
300 					outBuffer[outBufferPos++] = c;
301 					++curCol;
302 
303 					if (c == 10)
304 						curCol = 0;  // reset current line length
305 				}
306 				// Binary data
307 				else
308 				{
309 					QP_ENCODE_HEX(c);
310 				}
311 
312 				break;
313 			}
314 			case 61:  // =
315 			{
316 				QP_ENCODE_HEX('=');
317 				break;
318 			}
319 			/*
320 				Rule #2: (Literal representation) Octets with decimal values of 33
321 				through 60 inclusive, and 62 through 126, inclusive, MAY be
322 				represented as the ASCII characters which correspond to those
323 				octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN
324 				through TILDE, respectively).
325 			*/
326 			default:
327 
328 				//if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126))
329 				if (c >= 33 && c <= 126 && c != 61 && c != 63)
330 				{
331 					outBuffer[outBufferPos++] = c;
332 					++curCol;
333 				}
334 				// Other characters: '=' + hexadecimal encoding
335 				else
336 				{
337 					QP_ENCODE_HEX(c);
338 				}
339 
340 				break;
341 
342 			} // switch (c)
343 
344 			// Soft line break : "=\r\n"
345 			if (cutLines && curCol >= maxLineLength - 1)
346 			{
347 				outBuffer[outBufferPos] = '=';
348 				outBuffer[outBufferPos + 1] = '\r';
349 				outBuffer[outBufferPos + 2] = '\n';
350 
351 				outBufferPos += 3;
352 				curCol = 0;
353 			}
354 
355 		} // !rfc2047
356 
357 		++inTotal;
358 
359 		if (progress)
360 			progress->progress(inTotal, inTotal);
361 	}
362 
363 	// Flush remaining output buffer
364 	if (outBufferPos != 0)
365 	{
366 		QP_WRITE(out, outBuffer, outBufferPos);
367 		total += outBufferPos;
368 	}
369 
370 	if (progress)
371 		progress->stop(inTotal);
372 
373 	return (total);
374 }
375 
376 
decode(utility::inputStream & in,utility::outputStream & out,utility::progressListener * progress)377 size_t qpEncoder::decode(utility::inputStream& in,
378 	utility::outputStream& out, utility::progressListener* progress)
379 {
380 	in.reset();  // may not work...
381 
382 	// Process the data
383 	const bool rfc2047 = getProperties().getProperty <bool>("rfc2047", false);
384 
385 	byte_t buffer[16384];
386 	size_t bufferLength = 0;
387 	size_t bufferPos = 0;
388 
389 	byte_t outBuffer[16384];
390 	size_t outBufferPos = 0;
391 
392 	size_t total = 0;
393 	size_t inTotal = 0;
394 
395 	while (bufferPos < bufferLength || !in.eof())
396 	{
397 		// Flush current output buffer
398 		if (outBufferPos >= sizeof(outBuffer))
399 		{
400 			QP_WRITE(out, outBuffer, outBufferPos);
401 
402 			total += outBufferPos;
403 			outBufferPos = 0;
404 		}
405 
406 		// Need to get more data?
407 		if (bufferPos >= bufferLength)
408 		{
409 			bufferLength = in.read(buffer, sizeof(buffer));
410 			bufferPos = 0;
411 
412 			// No more data
413 			if (bufferLength == 0)
414 				break;
415 		}
416 
417 		// Decode the next sequence (hex-encoded byte or printable character)
418 		byte_t c = buffer[bufferPos++];
419 
420 		++inTotal;
421 
422 		switch (c)
423 		{
424 		case '=':
425 		{
426 			if (bufferPos >= bufferLength)
427 			{
428 				bufferLength = in.read(buffer, sizeof(buffer));
429 				bufferPos = 0;
430 			}
431 
432 			if (bufferPos < bufferLength)
433 			{
434 				c = buffer[bufferPos++];
435 
436 				++inTotal;
437 
438 				switch (c)
439 				{
440 				// Ignore soft line break ("=\r\n" or "=\n")
441 				case '\r':
442 
443 					// Read one byte more
444 					if (bufferPos >= bufferLength)
445 					{
446 						bufferLength = in.read(buffer, sizeof(buffer));
447 						bufferPos = 0;
448 					}
449 
450 					if (bufferPos < bufferLength)
451 					{
452 						++bufferPos;
453 						++inTotal;
454 					}
455 
456 					break;
457 
458 				case '\n':
459 
460 					break;
461 
462 				// Hex-encoded char
463 				default:
464 				{
465 					// We need another byte...
466 					if (bufferPos >= bufferLength)
467 					{
468 						bufferLength = in.read(buffer, sizeof(buffer));
469 						bufferPos = 0;
470 					}
471 
472 					if (bufferPos < bufferLength)
473 					{
474 						const byte_t next = buffer[bufferPos++];
475 
476 						++inTotal;
477 
478 						const byte_t value = static_cast <byte_t>
479 							(sm_hexDecodeTable[c] * 16 + sm_hexDecodeTable[next]);
480 
481 						outBuffer[outBufferPos++] = value;
482 					}
483 					else
484 					{
485 						// Premature end-of-data
486 					}
487 
488 					break;
489 				}
490 
491 				}
492 			}
493 			else
494 			{
495 				// Premature end-of-data
496 			}
497 
498 			break;
499 		}
500 		case '_':
501 		{
502 			if (rfc2047)
503 			{
504 				// RFC-2047, Page 5, 4.2. The "Q" encoding:
505 				// << Note that the "_" always represents hexadecimal 20, even if the SPACE
506 				// character occupies a different code position in the character set in use. >>
507 				outBuffer[outBufferPos++] = 0x20;
508 				break;
509 			}
510 
511 			// no break here...
512 		}
513 		default:
514 		{
515 			outBuffer[outBufferPos++] = c;
516 		}
517 
518 		}
519 
520 		if (progress)
521 			progress->progress(inTotal, inTotal);
522 	}
523 
524 	// Flush remaining output buffer
525 	if (outBufferPos != 0)
526 	{
527 		QP_WRITE(out, outBuffer, outBufferPos);
528 		total += outBufferPos;
529 	}
530 
531 	if (progress)
532 		progress->stop(inTotal);
533 
534 	return (total);
535 }
536 
537 
getEncodedSize(const size_t n) const538 size_t qpEncoder::getEncodedSize(const size_t n) const
539 {
540 	const size_t propMaxLineLength =
541 		getProperties().getProperty <size_t>("maxlinelength", static_cast <size_t>(-1));
542 
543 	const bool cutLines = (propMaxLineLength != static_cast <size_t>(-1));
544 	const size_t maxLineLength = std::min(propMaxLineLength, static_cast <size_t>(74));
545 
546 	// Worst cast: 1 byte of input provide 3 bytes of output
547 	// Count CRLF (2 bytes) for each line.
548 	return n * 3 + (cutLines ? (n / maxLineLength) * 2 : 0);
549 }
550 
551 
getDecodedSize(const size_t n) const552 size_t qpEncoder::getDecodedSize(const size_t n) const
553 {
554 	// Worst case: 1 byte of input equals 1 byte of output
555 	return n;
556 }
557 
558 
559 } // encoder
560 } // utility
561 } // vmime
562