1 //
2 // VMime library (http://www.vmime.org)
3 // Copyright (C) 2002-2013 Vincent Richard <vincent@vmime.org>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 3 of
8 // the License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License along
16 // with this program; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Linking this library statically or dynamically with other modules is making
20 // a combined work based on this library.  Thus, the terms and conditions of
21 // the GNU General Public License cover the whole combination.
22 //
23 
24 #include "vmime/parameter.hpp"
25 #include "vmime/parserHelpers.hpp"
26 
27 #include "vmime/text.hpp"
28 #include "vmime/encoding.hpp"
29 
30 #include "vmime/utility/outputStreamAdapter.hpp"
31 #include "vmime/utility/outputStreamStringAdapter.hpp"
32 
33 
34 namespace vmime
35 {
36 
37 
parameter(const string & name)38 parameter::parameter(const string& name)
39 	: m_name(name), m_value(make_shared <word>())
40 {
41 }
42 
43 
parameter(const string & name,const word & value)44 parameter::parameter(const string& name, const word& value)
45 	: m_name(name), m_value(make_shared <word>(value))
46 {
47 }
48 
49 
parameter(const string & name,const string & value)50 parameter::parameter(const string& name, const string& value)
51 	: m_name(name), m_value(make_shared <word>(value))
52 {
53 }
54 
55 
parameter(const parameter &)56 parameter::parameter(const parameter&)
57 	: component()
58 {
59 }
60 
61 
clone() const62 shared_ptr <component> parameter::clone() const
63 {
64 	shared_ptr <parameter> p = make_shared <parameter>(m_name);
65 	p->copyFrom(*this);
66 
67 	return (p);
68 }
69 
70 
copyFrom(const component & other)71 void parameter::copyFrom(const component& other)
72 {
73 	const parameter& param = dynamic_cast <const parameter&>(other);
74 
75 	m_name = param.m_name;
76 	m_value->copyFrom(*param.m_value);
77 }
78 
79 
operator =(const parameter & other)80 parameter& parameter::operator=(const parameter& other)
81 {
82 	copyFrom(other);
83 	return (*this);
84 }
85 
86 
getName() const87 const string& parameter::getName() const
88 {
89 	return m_name;
90 }
91 
92 
getValue() const93 const word& parameter::getValue() const
94 {
95 	return *m_value;
96 }
97 
98 
setValue(const component & value)99 void parameter::setValue(const component& value)
100 {
101 	std::ostringstream oss;
102 	utility::outputStreamAdapter vos(oss);
103 
104 	value.generate(vos);
105 
106 	setValue(word(oss.str(), vmime::charsets::US_ASCII));
107 }
108 
109 
setValue(const word & value)110 void parameter::setValue(const word& value)
111 {
112 	*m_value = value;
113 }
114 
115 
parseImpl(const parsingContext & ctx,const string & buffer,const size_t position,const size_t end,size_t * newPosition)116 void parameter::parseImpl
117 	(const parsingContext& ctx, const string& buffer, const size_t position,
118 	 const size_t end, size_t* newPosition)
119 {
120 	m_value->setBuffer(string(buffer.begin() + position, buffer.begin() + end));
121 
122 	if (ctx.getInternationalizedEmailSupport())
123 		m_value->setCharset(charset(charsets::UTF_8));
124 	else
125 		m_value->setCharset(charset(charsets::US_ASCII));
126 
127 	if (newPosition)
128 		*newPosition = end;
129 }
130 
131 
parse(const parsingContext & ctx,const std::vector<valueChunk> & chunks)132 void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>& chunks)
133 {
134 	bool foundCharsetChunk = false;
135 
136 	charset ch(charsets::US_ASCII);
137 	string lang;
138 
139 	std::ostringstream value;
140 	value.imbue(std::locale::classic());
141 
142 	for (std::vector <valueChunk>::size_type i = 0 ; i < chunks.size() ; ++i)
143 	{
144 		const valueChunk& chunk = chunks[i];
145 
146 		// Decode following data
147 		if (chunk.encoded)
148 		{
149 			const size_t len = chunk.data.length();
150 			size_t pos = 0;
151 
152 			// If this is the first encoded chunk, extract charset
153 			// and language information
154 			if (!foundCharsetChunk)
155 			{
156 				// Eg. "us-ascii'en'This%20is%20even%20more%20"
157 				size_t q = chunk.data.find_first_of('\'');
158 
159 				if (q != string::npos)
160 				{
161 					const string chs = chunk.data.substr(0, q);
162 
163 					if (!chs.empty())
164 						ch = charset(chs);
165 
166 					++q;
167 					pos = q;
168 				}
169 
170 				q = chunk.data.find_first_of('\'', pos);
171 
172 				if (q != string::npos)
173 				{
174 					// Extract language
175 					lang = chunk.data.substr(pos, q - pos);
176 
177 					++q;
178 					pos = q;
179 				}
180 
181 				foundCharsetChunk = true;
182 			}
183 
184 			for (size_t i = pos ; i < len ; ++i)
185 			{
186 				const char c = chunk.data[i];
187 
188 				if (c == '%' && i + 2 < len)
189 				{
190 					unsigned int v = 0;
191 
192 					// First char
193 					switch (chunk.data[i + 1])
194 					{
195 					case 'a': case 'A': v += 10; break;
196 					case 'b': case 'B': v += 11; break;
197 					case 'c': case 'C': v += 12; break;
198 					case 'd': case 'D': v += 13; break;
199 					case 'e': case 'E': v += 14; break;
200 					case 'f': case 'F': v += 15; break;
201 					default: // assume 0-9
202 
203 						v += (chunk.data[i + 1] - '0');
204 						break;
205 					}
206 
207 					v *= 16;
208 
209 					// Second char
210 					switch (chunk.data[i + 2])
211 					{
212 					case 'a': case 'A': v += 10; break;
213 					case 'b': case 'B': v += 11; break;
214 					case 'c': case 'C': v += 12; break;
215 					case 'd': case 'D': v += 13; break;
216 					case 'e': case 'E': v += 14; break;
217 					case 'f': case 'F': v += 15; break;
218 					default: // assume 0-9
219 
220 						v += (chunk.data[i + 2] - '0');
221 						break;
222 					}
223 
224 					value << static_cast <char>(v);
225 
226 					i += 2; // skip next 2 chars
227 				}
228 				else
229 				{
230 					value << c;
231 				}
232 			}
233 		}
234 		// Simply copy data, as it is not encoded
235 		else
236 		{
237 			// This syntax is non-standard (expressly prohibited
238 			// by RFC-2047), but is used by Mozilla:
239 			//
240     		// Content-Type: image/png;
241 			//    name="=?us-ascii?Q?Logo_VMime=2Epng?="
242 
243 			// Using 'vmime::text' to parse the data is safe even
244 			// if the data is not encoded, because it can recover
245 			// from parsing errors.
246 			vmime::text t;
247 			t.parse(ctx, chunk.data);
248 
249 			if (t.getWordCount() != 0)
250 			{
251 				value << t.getWholeBuffer();
252 
253 				if (!foundCharsetChunk)
254 				{
255 					// This is still wrong. Each word can have it's own charset, and can
256 					// be mixed (eg. iso-8859-1 and iso-2022-jp), but very unlikely. Real
257 					// fix is to have parameters store a vmime::text instead of a
258 					// vmime::word in m_value. But that changes the interface.
259 					for (size_t i = 0 ; i < t.getWordCount() ; ++i)
260 					{
261 						if (t.getWordAt(i)->getCharset() != ch && ch == charsets::US_ASCII)
262 						{
263 							ch = t.getWordAt(i)->getCharset();
264 							break;
265 						}
266 					}
267 				}
268 			}
269 		}
270 	}
271 
272 	m_value->setBuffer(value.str());
273 	m_value->setCharset(ch);
274 	m_value->setLanguage(lang);
275 }
276 
277 
generateImpl(const generationContext & ctx,utility::outputStream & os,const size_t curLinePos,size_t * newLinePos) const278 void parameter::generateImpl
279 	(const generationContext& ctx, utility::outputStream& os,
280 	 const size_t curLinePos, size_t* newLinePos) const
281 {
282 	const string& name = m_name;
283 	const string& value = m_value->getBuffer();
284 
285 	// For compatibility with implementations that do not understand RFC-2231,
286 	// we may also generate a normal "7bit/us-ascii" parameter
287 	generationContext::EncodedParameterValueModes
288 		genMode = ctx.getEncodedParameterValueMode();
289 
290 #if VMIME_ALWAYS_GENERATE_7BIT_PARAMETER
291 	genMode = generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047;
292 #endif
293 
294 	// [By Eugene A. Shatokhin]
295 	// Note that if both the normal "7bit/us-ascii" value and the extended
296 	// value are present, the latter can be ignored by mail processing systems.
297 	// This may lead to annoying problems, for example, with strange names of
298 	// attachments with all but 7-bit ascii characters removed, etc. To avoid
299 	// this, I would suggest not to create "7bit/us-ascii" value if the extended
300 	// value is to be generated.
301 
302 	// A stream for a temporary storage
303 	string sevenBitBuffer;
304 	utility::outputStreamStringAdapter sevenBitStream(sevenBitBuffer);
305 
306 	size_t pos = curLinePos;
307 
308 	if (pos + name.length() + 10 + value.length() > ctx.getMaxLineLength())
309 	{
310 		sevenBitStream << NEW_LINE_SEQUENCE;
311 		pos = NEW_LINE_SEQUENCE_LENGTH;
312 	}
313 
314 	bool needQuoting = false;
315 	bool needQuotedPrintable = false;
316 	size_t valueLength = 0;
317 
318 	// Use worst-case length name.length()+2 for 'name=' part of line
319 	for (size_t i = 0 ; (i < value.length()) && (pos + name.length() + 2 + valueLength < ctx.getMaxLineLength() - 4) ; ++i, ++valueLength)
320 	{
321 		switch (value[i])
322 		{
323 		// Characters that need to be quoted _and_ escaped
324 		case '"':
325 		case '\\':
326 		// Other characters that need quoting
327 		case ' ':
328 		case '\t':
329 		case '(':
330 		case ')':
331 		case '<':
332 		case '>':
333 		case '@':
334 		case ',':
335 		case ';':
336 		case ':':
337 		case '/':
338 		case '[':
339 		case ']':
340 		case '?':
341 		case '=':
342 
343 			needQuoting = true;
344 			break;
345 
346 		default:
347 
348 			if (!parserHelpers::isAscii(value[i]))
349 			{
350 				needQuotedPrintable = true;
351 				needQuoting = true;
352 			}
353 
354 			break;
355 		}
356 	}
357 
358 	const bool cutValue = (valueLength != value.length());  // has the value been cut?
359 
360 	if (needQuoting)
361 	{
362 		sevenBitStream << name << "=\"";
363 		pos += name.length() + 2;
364 	}
365 	else
366 	{
367 		sevenBitStream << name << "=";
368 		pos += name.length() + 1;
369 	}
370 
371 	// Check whether there is a recommended encoding for this charset.
372 	// If so, the whole buffer will be encoded. Else, the number of
373 	// 7-bit (ASCII) bytes in the input will be used to determine if
374 	// we need to encode the whole buffer.
375 	encoding recommendedEnc;
376 	const bool alwaysEncode = m_value->getCharset().getRecommendedEncoding(recommendedEnc);
377 	bool extended = alwaysEncode;
378 
379 	if ((needQuotedPrintable || cutValue || !m_value->getLanguage().empty()) &&
380 	    genMode != generationContext::PARAMETER_VALUE_NO_ENCODING)
381 	{
382 		// Send the name in quoted-printable, so outlook express et.al.
383 		// will understand the real filename
384 		size_t oldLen = sevenBitBuffer.length();
385 		m_value->generate(sevenBitStream);
386 		pos += sevenBitBuffer.length() - oldLen;
387 		extended = true;		// also send with RFC-2231 encoding
388 	}
389 	else
390 	{
391 		// Do not chop off this value, but just add the complete name as one header line.
392 		for (size_t i = 0, n = value.length(), curValueLength = 0 ;
393 		     i < n && curValueLength < valueLength ; ++i)
394 		{
395 			const char_t c = value[i];
396 
397 			if (/* needQuoting && */ (c == '"' || c == '\\'))  // 'needQuoting' is implicit
398 			{
399 				sevenBitStream << '\\' << value[i];  // escape 'x' with '\x'
400 				pos += 2;
401 			}
402 			else if (parserHelpers::isAscii(c))
403 			{
404 				sevenBitStream << value[i];
405 				++pos;
406 				++curValueLength;
407 			}
408 			else
409 			{
410 				extended = true;
411 			}
412 		}
413 
414 	} // !needQuotedPrintable
415 
416 	if (needQuoting)
417 	{
418 		sevenBitStream << '"';
419 		++pos;
420 	}
421 
422 	if (genMode == generationContext::PARAMETER_VALUE_RFC2047_ONLY ||
423 	    genMode == generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047)
424 	{
425 		os << sevenBitBuffer;
426 	}
427 
428 	// Also generate an extended parameter if the value contains 8-bit characters
429 	// or is too long for a single line
430 	if ((extended || cutValue) &&
431 		genMode != generationContext::PARAMETER_VALUE_NO_ENCODING &&
432 	    genMode != generationContext::PARAMETER_VALUE_RFC2047_ONLY)
433 	{
434 
435 		if (genMode == generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047)
436 		{
437 			os << ';';
438 			++pos;
439 		}
440 		else
441 		{
442 			// The data output to 'sevenBitBuffer' will be discarded in this case
443 			pos = curLinePos;
444 		}
445 
446 		/* RFC-2231
447 		 * ========
448 		 *
449 		 * Content-Type: message/external-body; access-type=URL;
450 		 *    URL*0="ftp://";
451 		 *    URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
452 		 *
453 		 * Content-Type: application/x-stuff;
454 		 *    title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A
455 		 *
456 		 * Content-Type: application/x-stuff;
457 		 *    title*0*=us-ascii'en'This%20is%20even%20more%20
458 		 *    title*1*=%2A%2A%2Afun%2A%2A%2A%20
459 		 *    title*2="isn't it!"
460 		 */
461 
462 		// Check whether there is enough space for the first section:
463 		// parameter name, section identifier, charset and separators
464 		// + at least 5 characters for the value
465 		const size_t firstSectionLength =
466 			  name.length() + 4 /* *0*= */ + 2 /* '' */
467 			+ m_value->getCharset().getName().length();
468 
469 		if (pos + firstSectionLength + 5 >= ctx.getMaxLineLength())
470 		{
471 			os << NEW_LINE_SEQUENCE;
472 			pos = NEW_LINE_SEQUENCE_LENGTH;
473 		}
474 
475 		// Split text into multiple sections that fit on one line
476 		int sectionCount = 0;
477 		std::vector <string> sectionText;
478 
479 		string currentSection;
480 		size_t currentSectionLength = firstSectionLength;
481 
482 		for (size_t i = 0 ; i < value.length() ; ++i)
483 		{
484 			// Check whether we should start a new line (taking into
485 			// account the next character will be encoded = worst case)
486 			if (currentSectionLength + 3 >= ctx.getMaxLineLength())
487 			{
488 				sectionText.push_back(currentSection);
489 				sectionCount++;
490 
491 				currentSection.clear();
492 				currentSectionLength = NEW_LINE_SEQUENCE_LENGTH
493 					+ name.length() + 6;
494 			}
495 
496 			// Output next character
497 			const char_t c = value[i];
498 			bool encode = false;
499 
500 			switch (c)
501 			{
502 			// special characters
503 			case ' ':
504 			case '\t':
505 			case '\r':
506 			case '\n':
507 			case '%':
508 			case '"':
509 			case ';':
510 			case ',':
511 			case '(':
512 			case ')':
513 			case '<':
514 			case '>':
515 			case '@':
516 			case ':':
517 			case '/':
518 			case '[':
519 			case ']':
520 			case '?':
521 			case '=':
522 
523 				encode = true;
524 				break;
525 
526 			default:
527 
528 				encode = (!parserHelpers::isPrint(c) ||
529 				          !parserHelpers::isAscii(c) ||
530 				          alwaysEncode);
531 
532 				break;
533 			}
534 
535 			if (encode)  // need encoding
536 			{
537 				const int h1 = static_cast <unsigned char>(c) / 16;
538 				const int h2 = static_cast <unsigned char>(c) % 16;
539 
540 				currentSection += '%';
541 				currentSection += "0123456789ABCDEF"[h1];
542 				currentSection += "0123456789ABCDEF"[h2];
543 
544 				pos += 3;
545 				currentSectionLength += 3;
546 			}
547 			else
548 			{
549 				currentSection += value[i];
550 
551 				++pos;
552 				++currentSectionLength;
553 			}
554 		}
555 
556 		if (!currentSection.empty())
557 		{
558 			sectionText.push_back(currentSection);
559 			sectionCount++;
560 		}
561 
562 		// Output sections
563 		for (int sectionNumber = 0 ; sectionNumber < sectionCount ; ++sectionNumber)
564 		{
565 			os << name;
566 
567 			if (sectionCount != 1) // no section specifier when only a single one
568 			{
569 				os << '*';
570 				os << sectionNumber;
571 			}
572 
573 			os << "*=";
574 
575 			if (sectionNumber == 0)
576 			{
577 				os << m_value->getCharset().getName();
578 				os << '\'' << /* No language */ '\'';
579 			}
580 
581 			os << sectionText[sectionNumber];
582 
583 			if (sectionNumber + 1 < sectionCount)
584 			{
585 				os << ';';
586 				os << NEW_LINE_SEQUENCE;
587 				pos = NEW_LINE_SEQUENCE_LENGTH;
588 			}
589 		}
590 	}
591 	else if (!(genMode == generationContext::PARAMETER_VALUE_RFC2047_ONLY ||
592 	           genMode == generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047))
593 	{
594 		// The value does not contain 8-bit characters and
595 		// is short enough for a single line.
596 		// "7bit/us-ascii" will suffice in this case.
597 
598 		// Output what has been stored in temporary buffer so far
599 		os << sevenBitBuffer;
600 	}
601 
602 	if (newLinePos)
603 		*newLinePos = pos;
604 }
605 
606 
getGeneratedSize(const generationContext & ctx)607 size_t parameter::getGeneratedSize(const generationContext& ctx)
608 {
609 	const string& name = m_name;
610 	const string& value = m_value->getBuffer();
611 
612 	const size_t bytesNeedingEncoding =
613 		value.length() - utility::stringUtils::countASCIIchars(value.begin(), value.end());
614 
615 	const size_t valueLength = value.length();
616 
617 	// Compute generated length in the very worst case
618 
619 	// Non-encoded parameter + value (worst case: quoting + QP)
620 	size_t len = name.length() + 1 /* = */ + 2 /* "" */ + 7 /* =?...?Q?...?= */
621 		+ m_value->getCharset().getName().length() + valueLength + bytesNeedingEncoding * 2 + 1 /* ; */;
622 
623 	// Encoded parameter + value
624 	const size_t maxEncodedValueLengthOnLine =
625 		  ctx.getMaxLineLength() - 2 /* CRLF */ - NEW_LINE_SEQUENCE_LENGTH
626 		- name.length() - 5 /* *00*= */ - 1 /* ; */;
627 
628 	const size_t encodedValueLength = (valueLength + bytesNeedingEncoding * 2)
629 		+ m_value->getCharset().getName().length() + m_value->getLanguage().length() + 2 /* 2 x ' */;
630 
631 	const size_t numberOfSections = 1 /* worst case: generation starts at the end of a line */
632 		+ std::max(size_t(1), encodedValueLength / maxEncodedValueLengthOnLine);
633 
634 	len += numberOfSections * (name.length() + 5 /* *00*= */ + 1 /* ; */ + 2 /* CRLF */ + NEW_LINE_SEQUENCE_LENGTH) + encodedValueLength;
635 
636 	return len;
637 
638 }
639 
640 
getChildComponents()641 const std::vector <shared_ptr <component> > parameter::getChildComponents()
642 {
643 	std::vector <shared_ptr <component> > list;
644 
645 	list.push_back(m_value);
646 
647 	return list;
648 }
649 
650 
651 } // vmime
652 
653