1 //
2 // VMime library (http://www.vmime.org)
3 // Copyright (C) 2002-2013 Vincent Richard <vincent@vmime.org>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 3 of
8 // the License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License along
16 // with this program; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Linking this library statically or dynamically with other modules is making
20 // a combined work based on this library. Thus, the terms and conditions of
21 // the GNU General Public License cover the whole combination.
22 //
23
24 #include "vmime/parameter.hpp"
25 #include "vmime/parserHelpers.hpp"
26
27 #include "vmime/text.hpp"
28 #include "vmime/encoding.hpp"
29
30 #include "vmime/utility/outputStreamAdapter.hpp"
31 #include "vmime/utility/outputStreamStringAdapter.hpp"
32
33
34 namespace vmime
35 {
36
37
parameter(const string & name)38 parameter::parameter(const string& name)
39 : m_name(name), m_value(make_shared <word>())
40 {
41 }
42
43
parameter(const string & name,const word & value)44 parameter::parameter(const string& name, const word& value)
45 : m_name(name), m_value(make_shared <word>(value))
46 {
47 }
48
49
parameter(const string & name,const string & value)50 parameter::parameter(const string& name, const string& value)
51 : m_name(name), m_value(make_shared <word>(value))
52 {
53 }
54
55
parameter(const parameter &)56 parameter::parameter(const parameter&)
57 : component()
58 {
59 }
60
61
clone() const62 shared_ptr <component> parameter::clone() const
63 {
64 shared_ptr <parameter> p = make_shared <parameter>(m_name);
65 p->copyFrom(*this);
66
67 return (p);
68 }
69
70
copyFrom(const component & other)71 void parameter::copyFrom(const component& other)
72 {
73 const parameter& param = dynamic_cast <const parameter&>(other);
74
75 m_name = param.m_name;
76 m_value->copyFrom(*param.m_value);
77 }
78
79
operator =(const parameter & other)80 parameter& parameter::operator=(const parameter& other)
81 {
82 copyFrom(other);
83 return (*this);
84 }
85
86
getName() const87 const string& parameter::getName() const
88 {
89 return m_name;
90 }
91
92
getValue() const93 const word& parameter::getValue() const
94 {
95 return *m_value;
96 }
97
98
setValue(const component & value)99 void parameter::setValue(const component& value)
100 {
101 std::ostringstream oss;
102 utility::outputStreamAdapter vos(oss);
103
104 value.generate(vos);
105
106 setValue(word(oss.str(), vmime::charsets::US_ASCII));
107 }
108
109
setValue(const word & value)110 void parameter::setValue(const word& value)
111 {
112 *m_value = value;
113 }
114
115
parseImpl(const parsingContext & ctx,const string & buffer,const size_t position,const size_t end,size_t * newPosition)116 void parameter::parseImpl
117 (const parsingContext& ctx, const string& buffer, const size_t position,
118 const size_t end, size_t* newPosition)
119 {
120 m_value->setBuffer(string(buffer.begin() + position, buffer.begin() + end));
121
122 if (ctx.getInternationalizedEmailSupport())
123 m_value->setCharset(charset(charsets::UTF_8));
124 else
125 m_value->setCharset(charset(charsets::US_ASCII));
126
127 if (newPosition)
128 *newPosition = end;
129 }
130
131
parse(const parsingContext & ctx,const std::vector<valueChunk> & chunks)132 void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>& chunks)
133 {
134 bool foundCharsetChunk = false;
135
136 charset ch(charsets::US_ASCII);
137 string lang;
138
139 std::ostringstream value;
140 value.imbue(std::locale::classic());
141
142 for (std::vector <valueChunk>::size_type i = 0 ; i < chunks.size() ; ++i)
143 {
144 const valueChunk& chunk = chunks[i];
145
146 // Decode following data
147 if (chunk.encoded)
148 {
149 const size_t len = chunk.data.length();
150 size_t pos = 0;
151
152 // If this is the first encoded chunk, extract charset
153 // and language information
154 if (!foundCharsetChunk)
155 {
156 // Eg. "us-ascii'en'This%20is%20even%20more%20"
157 size_t q = chunk.data.find_first_of('\'');
158
159 if (q != string::npos)
160 {
161 const string chs = chunk.data.substr(0, q);
162
163 if (!chs.empty())
164 ch = charset(chs);
165
166 ++q;
167 pos = q;
168 }
169
170 q = chunk.data.find_first_of('\'', pos);
171
172 if (q != string::npos)
173 {
174 // Extract language
175 lang = chunk.data.substr(pos, q - pos);
176
177 ++q;
178 pos = q;
179 }
180
181 foundCharsetChunk = true;
182 }
183
184 for (size_t i = pos ; i < len ; ++i)
185 {
186 const char c = chunk.data[i];
187
188 if (c == '%' && i + 2 < len)
189 {
190 unsigned int v = 0;
191
192 // First char
193 switch (chunk.data[i + 1])
194 {
195 case 'a': case 'A': v += 10; break;
196 case 'b': case 'B': v += 11; break;
197 case 'c': case 'C': v += 12; break;
198 case 'd': case 'D': v += 13; break;
199 case 'e': case 'E': v += 14; break;
200 case 'f': case 'F': v += 15; break;
201 default: // assume 0-9
202
203 v += (chunk.data[i + 1] - '0');
204 break;
205 }
206
207 v *= 16;
208
209 // Second char
210 switch (chunk.data[i + 2])
211 {
212 case 'a': case 'A': v += 10; break;
213 case 'b': case 'B': v += 11; break;
214 case 'c': case 'C': v += 12; break;
215 case 'd': case 'D': v += 13; break;
216 case 'e': case 'E': v += 14; break;
217 case 'f': case 'F': v += 15; break;
218 default: // assume 0-9
219
220 v += (chunk.data[i + 2] - '0');
221 break;
222 }
223
224 value << static_cast <char>(v);
225
226 i += 2; // skip next 2 chars
227 }
228 else
229 {
230 value << c;
231 }
232 }
233 }
234 // Simply copy data, as it is not encoded
235 else
236 {
237 // This syntax is non-standard (expressly prohibited
238 // by RFC-2047), but is used by Mozilla:
239 //
240 // Content-Type: image/png;
241 // name="=?us-ascii?Q?Logo_VMime=2Epng?="
242
243 // Using 'vmime::text' to parse the data is safe even
244 // if the data is not encoded, because it can recover
245 // from parsing errors.
246 vmime::text t;
247 t.parse(ctx, chunk.data);
248
249 if (t.getWordCount() != 0)
250 {
251 value << t.getWholeBuffer();
252
253 if (!foundCharsetChunk)
254 {
255 // This is still wrong. Each word can have it's own charset, and can
256 // be mixed (eg. iso-8859-1 and iso-2022-jp), but very unlikely. Real
257 // fix is to have parameters store a vmime::text instead of a
258 // vmime::word in m_value. But that changes the interface.
259 for (size_t i = 0 ; i < t.getWordCount() ; ++i)
260 {
261 if (t.getWordAt(i)->getCharset() != ch && ch == charsets::US_ASCII)
262 {
263 ch = t.getWordAt(i)->getCharset();
264 break;
265 }
266 }
267 }
268 }
269 }
270 }
271
272 m_value->setBuffer(value.str());
273 m_value->setCharset(ch);
274 m_value->setLanguage(lang);
275 }
276
277
generateImpl(const generationContext & ctx,utility::outputStream & os,const size_t curLinePos,size_t * newLinePos) const278 void parameter::generateImpl
279 (const generationContext& ctx, utility::outputStream& os,
280 const size_t curLinePos, size_t* newLinePos) const
281 {
282 const string& name = m_name;
283 const string& value = m_value->getBuffer();
284
285 // For compatibility with implementations that do not understand RFC-2231,
286 // we may also generate a normal "7bit/us-ascii" parameter
287 generationContext::EncodedParameterValueModes
288 genMode = ctx.getEncodedParameterValueMode();
289
290 #if VMIME_ALWAYS_GENERATE_7BIT_PARAMETER
291 genMode = generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047;
292 #endif
293
294 // [By Eugene A. Shatokhin]
295 // Note that if both the normal "7bit/us-ascii" value and the extended
296 // value are present, the latter can be ignored by mail processing systems.
297 // This may lead to annoying problems, for example, with strange names of
298 // attachments with all but 7-bit ascii characters removed, etc. To avoid
299 // this, I would suggest not to create "7bit/us-ascii" value if the extended
300 // value is to be generated.
301
302 // A stream for a temporary storage
303 string sevenBitBuffer;
304 utility::outputStreamStringAdapter sevenBitStream(sevenBitBuffer);
305
306 size_t pos = curLinePos;
307
308 if (pos + name.length() + 10 + value.length() > ctx.getMaxLineLength())
309 {
310 sevenBitStream << NEW_LINE_SEQUENCE;
311 pos = NEW_LINE_SEQUENCE_LENGTH;
312 }
313
314 bool needQuoting = false;
315 bool needQuotedPrintable = false;
316 size_t valueLength = 0;
317
318 // Use worst-case length name.length()+2 for 'name=' part of line
319 for (size_t i = 0 ; (i < value.length()) && (pos + name.length() + 2 + valueLength < ctx.getMaxLineLength() - 4) ; ++i, ++valueLength)
320 {
321 switch (value[i])
322 {
323 // Characters that need to be quoted _and_ escaped
324 case '"':
325 case '\\':
326 // Other characters that need quoting
327 case ' ':
328 case '\t':
329 case '(':
330 case ')':
331 case '<':
332 case '>':
333 case '@':
334 case ',':
335 case ';':
336 case ':':
337 case '/':
338 case '[':
339 case ']':
340 case '?':
341 case '=':
342
343 needQuoting = true;
344 break;
345
346 default:
347
348 if (!parserHelpers::isAscii(value[i]))
349 {
350 needQuotedPrintable = true;
351 needQuoting = true;
352 }
353
354 break;
355 }
356 }
357
358 const bool cutValue = (valueLength != value.length()); // has the value been cut?
359
360 if (needQuoting)
361 {
362 sevenBitStream << name << "=\"";
363 pos += name.length() + 2;
364 }
365 else
366 {
367 sevenBitStream << name << "=";
368 pos += name.length() + 1;
369 }
370
371 // Check whether there is a recommended encoding for this charset.
372 // If so, the whole buffer will be encoded. Else, the number of
373 // 7-bit (ASCII) bytes in the input will be used to determine if
374 // we need to encode the whole buffer.
375 encoding recommendedEnc;
376 const bool alwaysEncode = m_value->getCharset().getRecommendedEncoding(recommendedEnc);
377 bool extended = alwaysEncode;
378
379 if ((needQuotedPrintable || cutValue || !m_value->getLanguage().empty()) &&
380 genMode != generationContext::PARAMETER_VALUE_NO_ENCODING)
381 {
382 // Send the name in quoted-printable, so outlook express et.al.
383 // will understand the real filename
384 size_t oldLen = sevenBitBuffer.length();
385 m_value->generate(sevenBitStream);
386 pos += sevenBitBuffer.length() - oldLen;
387 extended = true; // also send with RFC-2231 encoding
388 }
389 else
390 {
391 // Do not chop off this value, but just add the complete name as one header line.
392 for (size_t i = 0, n = value.length(), curValueLength = 0 ;
393 i < n && curValueLength < valueLength ; ++i)
394 {
395 const char_t c = value[i];
396
397 if (/* needQuoting && */ (c == '"' || c == '\\')) // 'needQuoting' is implicit
398 {
399 sevenBitStream << '\\' << value[i]; // escape 'x' with '\x'
400 pos += 2;
401 }
402 else if (parserHelpers::isAscii(c))
403 {
404 sevenBitStream << value[i];
405 ++pos;
406 ++curValueLength;
407 }
408 else
409 {
410 extended = true;
411 }
412 }
413
414 } // !needQuotedPrintable
415
416 if (needQuoting)
417 {
418 sevenBitStream << '"';
419 ++pos;
420 }
421
422 if (genMode == generationContext::PARAMETER_VALUE_RFC2047_ONLY ||
423 genMode == generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047)
424 {
425 os << sevenBitBuffer;
426 }
427
428 // Also generate an extended parameter if the value contains 8-bit characters
429 // or is too long for a single line
430 if ((extended || cutValue) &&
431 genMode != generationContext::PARAMETER_VALUE_NO_ENCODING &&
432 genMode != generationContext::PARAMETER_VALUE_RFC2047_ONLY)
433 {
434
435 if (genMode == generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047)
436 {
437 os << ';';
438 ++pos;
439 }
440 else
441 {
442 // The data output to 'sevenBitBuffer' will be discarded in this case
443 pos = curLinePos;
444 }
445
446 /* RFC-2231
447 * ========
448 *
449 * Content-Type: message/external-body; access-type=URL;
450 * URL*0="ftp://";
451 * URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
452 *
453 * Content-Type: application/x-stuff;
454 * title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A
455 *
456 * Content-Type: application/x-stuff;
457 * title*0*=us-ascii'en'This%20is%20even%20more%20
458 * title*1*=%2A%2A%2Afun%2A%2A%2A%20
459 * title*2="isn't it!"
460 */
461
462 // Check whether there is enough space for the first section:
463 // parameter name, section identifier, charset and separators
464 // + at least 5 characters for the value
465 const size_t firstSectionLength =
466 name.length() + 4 /* *0*= */ + 2 /* '' */
467 + m_value->getCharset().getName().length();
468
469 if (pos + firstSectionLength + 5 >= ctx.getMaxLineLength())
470 {
471 os << NEW_LINE_SEQUENCE;
472 pos = NEW_LINE_SEQUENCE_LENGTH;
473 }
474
475 // Split text into multiple sections that fit on one line
476 int sectionCount = 0;
477 std::vector <string> sectionText;
478
479 string currentSection;
480 size_t currentSectionLength = firstSectionLength;
481
482 for (size_t i = 0 ; i < value.length() ; ++i)
483 {
484 // Check whether we should start a new line (taking into
485 // account the next character will be encoded = worst case)
486 if (currentSectionLength + 3 >= ctx.getMaxLineLength())
487 {
488 sectionText.push_back(currentSection);
489 sectionCount++;
490
491 currentSection.clear();
492 currentSectionLength = NEW_LINE_SEQUENCE_LENGTH
493 + name.length() + 6;
494 }
495
496 // Output next character
497 const char_t c = value[i];
498 bool encode = false;
499
500 switch (c)
501 {
502 // special characters
503 case ' ':
504 case '\t':
505 case '\r':
506 case '\n':
507 case '%':
508 case '"':
509 case ';':
510 case ',':
511 case '(':
512 case ')':
513 case '<':
514 case '>':
515 case '@':
516 case ':':
517 case '/':
518 case '[':
519 case ']':
520 case '?':
521 case '=':
522
523 encode = true;
524 break;
525
526 default:
527
528 encode = (!parserHelpers::isPrint(c) ||
529 !parserHelpers::isAscii(c) ||
530 alwaysEncode);
531
532 break;
533 }
534
535 if (encode) // need encoding
536 {
537 const int h1 = static_cast <unsigned char>(c) / 16;
538 const int h2 = static_cast <unsigned char>(c) % 16;
539
540 currentSection += '%';
541 currentSection += "0123456789ABCDEF"[h1];
542 currentSection += "0123456789ABCDEF"[h2];
543
544 pos += 3;
545 currentSectionLength += 3;
546 }
547 else
548 {
549 currentSection += value[i];
550
551 ++pos;
552 ++currentSectionLength;
553 }
554 }
555
556 if (!currentSection.empty())
557 {
558 sectionText.push_back(currentSection);
559 sectionCount++;
560 }
561
562 // Output sections
563 for (int sectionNumber = 0 ; sectionNumber < sectionCount ; ++sectionNumber)
564 {
565 os << name;
566
567 if (sectionCount != 1) // no section specifier when only a single one
568 {
569 os << '*';
570 os << sectionNumber;
571 }
572
573 os << "*=";
574
575 if (sectionNumber == 0)
576 {
577 os << m_value->getCharset().getName();
578 os << '\'' << /* No language */ '\'';
579 }
580
581 os << sectionText[sectionNumber];
582
583 if (sectionNumber + 1 < sectionCount)
584 {
585 os << ';';
586 os << NEW_LINE_SEQUENCE;
587 pos = NEW_LINE_SEQUENCE_LENGTH;
588 }
589 }
590 }
591 else if (!(genMode == generationContext::PARAMETER_VALUE_RFC2047_ONLY ||
592 genMode == generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047))
593 {
594 // The value does not contain 8-bit characters and
595 // is short enough for a single line.
596 // "7bit/us-ascii" will suffice in this case.
597
598 // Output what has been stored in temporary buffer so far
599 os << sevenBitBuffer;
600 }
601
602 if (newLinePos)
603 *newLinePos = pos;
604 }
605
606
getGeneratedSize(const generationContext & ctx)607 size_t parameter::getGeneratedSize(const generationContext& ctx)
608 {
609 const string& name = m_name;
610 const string& value = m_value->getBuffer();
611
612 const size_t bytesNeedingEncoding =
613 value.length() - utility::stringUtils::countASCIIchars(value.begin(), value.end());
614
615 const size_t valueLength = value.length();
616
617 // Compute generated length in the very worst case
618
619 // Non-encoded parameter + value (worst case: quoting + QP)
620 size_t len = name.length() + 1 /* = */ + 2 /* "" */ + 7 /* =?...?Q?...?= */
621 + m_value->getCharset().getName().length() + valueLength + bytesNeedingEncoding * 2 + 1 /* ; */;
622
623 // Encoded parameter + value
624 const size_t maxEncodedValueLengthOnLine =
625 ctx.getMaxLineLength() - 2 /* CRLF */ - NEW_LINE_SEQUENCE_LENGTH
626 - name.length() - 5 /* *00*= */ - 1 /* ; */;
627
628 const size_t encodedValueLength = (valueLength + bytesNeedingEncoding * 2)
629 + m_value->getCharset().getName().length() + m_value->getLanguage().length() + 2 /* 2 x ' */;
630
631 const size_t numberOfSections = 1 /* worst case: generation starts at the end of a line */
632 + std::max(size_t(1), encodedValueLength / maxEncodedValueLengthOnLine);
633
634 len += numberOfSections * (name.length() + 5 /* *00*= */ + 1 /* ; */ + 2 /* CRLF */ + NEW_LINE_SEQUENCE_LENGTH) + encodedValueLength;
635
636 return len;
637
638 }
639
640
getChildComponents()641 const std::vector <shared_ptr <component> > parameter::getChildComponents()
642 {
643 std::vector <shared_ptr <component> > list;
644
645 list.push_back(m_value);
646
647 return list;
648 }
649
650
651 } // vmime
652
653