1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <algorithm>
21 #include <limits>
22 #include <forward_list>
23 #include <memory>
24 
25 #include <sal/log.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/tencinfo.h>
30 #include <tools/inetmime.hxx>
31 #include <rtl/character.hxx>
32 
33 namespace {
34 
35 rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
36                                            const sal_Char * pEnd);
37 
38 /** Check for US-ASCII white space character.
39 
40     @param nChar  Some UCS-4 character.
41 
42     @return  True if nChar is a US-ASCII white space character (US-ASCII
43     0x09 or 0x20).
44  */
isWhiteSpace(sal_uInt32 nChar)45 bool isWhiteSpace(sal_uInt32 nChar)
46 {
47     return nChar == '\t' || nChar == ' ';
48 }
49 
50 /** Get the Base 64 digit weight of a US-ASCII character.
51 
52     @param nChar  Some UCS-4 character.
53 
54     @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
55     'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
56     corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
57     character (US-ASCII '='), return -1; otherwise, return -2.
58  */
getBase64Weight(sal_uInt32 nChar)59 int getBase64Weight(sal_uInt32 nChar)
60 {
61     return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
62            rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
63            rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
64            nChar == '+' ? 62 :
65            nChar == '/' ? 63 :
66            nChar == '=' ? -1 : -2;
67 }
68 
startsWithLineFolding(const sal_Unicode * pBegin,const sal_Unicode * pEnd)69 bool startsWithLineFolding(const sal_Unicode * pBegin,
70                                             const sal_Unicode * pEnd)
71 {
72     DBG_ASSERT(pBegin && pBegin <= pEnd,
73                "startsWithLineFolding(): Bad sequence");
74 
75     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76            && isWhiteSpace(pBegin[2]); // CR, LF
77 }
78 
translateFromMIME(rtl_TextEncoding eEncoding)79 rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80                                                         eEncoding)
81 {
82 #if defined(_WIN32)
83     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84                RTL_TEXTENCODING_MS_1252 : eEncoding;
85 #else
86     return eEncoding;
87 #endif
88 }
89 
isMIMECharsetEncoding(rtl_TextEncoding eEncoding)90 bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
91 {
92     return rtl_isOctetTextEncoding(eEncoding);
93 }
94 
convertToUnicode(const sal_Char * pBegin,const sal_Char * pEnd,rtl_TextEncoding eEncoding,sal_Size & rSize)95 std::unique_ptr<sal_Unicode[]> convertToUnicode(const sal_Char * pBegin,
96                                          const sal_Char * pEnd,
97                                          rtl_TextEncoding eEncoding,
98                                          sal_Size & rSize)
99 {
100     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101         return nullptr;
102     rtl_TextToUnicodeConverter hConverter
103         = rtl_createTextToUnicodeConverter(eEncoding);
104     rtl_TextToUnicodeContext hContext
105         = rtl_createTextToUnicodeContext(hConverter);
106     std::unique_ptr<sal_Unicode[]> pBuffer;
107     sal_uInt32 nInfo;
108     for (sal_Size nBufferSize = pEnd - pBegin;;
109          nBufferSize += nBufferSize / 3 + 1)
110     {
111         pBuffer.reset(new sal_Unicode[nBufferSize]);
112         sal_Size nSrcCvtBytes;
113         rSize = rtl_convertTextToUnicode(
114                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
115                     nBufferSize,
116                     RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117                         | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118                         | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119                     &nInfo, &nSrcCvtBytes);
120         if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121             break;
122         pBuffer.reset();
123         rtl_resetTextToUnicodeContext(hConverter, hContext);
124     }
125     rtl_destroyTextToUnicodeContext(hConverter, hContext);
126     rtl_destroyTextToUnicodeConverter(hConverter);
127     if (nInfo != 0)
128     {
129         pBuffer.reset();
130     }
131     return pBuffer;
132 }
133 
convertFromUnicode(const sal_Unicode * pBegin,const sal_Unicode * pEnd,rtl_TextEncoding eEncoding,sal_Size & rSize)134 std::unique_ptr<sal_Char[]> convertFromUnicode(const sal_Unicode * pBegin,
135                                         const sal_Unicode * pEnd,
136                                         rtl_TextEncoding eEncoding,
137                                         sal_Size & rSize)
138 {
139     if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
140         return nullptr;
141     rtl_UnicodeToTextConverter hConverter
142         = rtl_createUnicodeToTextConverter(eEncoding);
143     rtl_UnicodeToTextContext hContext
144         = rtl_createUnicodeToTextContext(hConverter);
145     std::unique_ptr<sal_Char[]> pBuffer;
146     sal_uInt32 nInfo;
147     for (sal_Size nBufferSize = pEnd - pBegin;;
148          nBufferSize += nBufferSize / 3 + 1)
149     {
150         pBuffer.reset(new sal_Char[nBufferSize]);
151         sal_Size nSrcCvtBytes;
152         rSize = rtl_convertUnicodeToText(
153                     hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
154                     nBufferSize,
155                     RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
156                         | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
157                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
158                         | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
159                     &nInfo, &nSrcCvtBytes);
160         if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
161             break;
162         pBuffer.reset();
163         rtl_resetUnicodeToTextContext(hConverter, hContext);
164     }
165     rtl_destroyUnicodeToTextContext(hConverter, hContext);
166     rtl_destroyUnicodeToTextConverter(hConverter);
167     if (nInfo != 0)
168     {
169         pBuffer.reset();
170     }
171     return pBuffer;
172 }
173 
174 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
175 
176     @param pBuffer  Points to a buffer, must not be null.
177 
178     @param nUTF32  A UTF-32 character, must be in the range 0..0x10FFFF.
179 
180     @return  A pointer past the UTF-16 characters put into the buffer
181     (i.e., pBuffer + 1 or pBuffer + 2).
182  */
putUTF32Character(sal_Unicode * pBuffer,sal_uInt32 nUTF32)183 sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
184                                                  sal_uInt32 nUTF32)
185 {
186     DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
187     if (nUTF32 < 0x10000)
188         *pBuffer++ = sal_Unicode(nUTF32);
189     else
190     {
191         nUTF32 -= 0x10000;
192         *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
193         *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
194     }
195     return pBuffer;
196 }
197 
writeUTF8(OStringBuffer & rSink,sal_uInt32 nChar)198 void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
199 {
200     // See RFC 2279 for a discussion of UTF-8.
201     DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
202 
203     if (nChar < 0x80)
204         rSink.append(sal_Char(nChar));
205     else if (nChar < 0x800)
206         rSink.append(sal_Char(nChar >> 6 | 0xC0))
207              .append(sal_Char((nChar & 0x3F) | 0x80));
208     else if (nChar < 0x10000)
209         rSink.append(sal_Char(nChar >> 12 | 0xE0))
210              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
211              .append(sal_Char((nChar & 0x3F) | 0x80));
212     else if (nChar < 0x200000)
213         rSink.append(sal_Char(nChar >> 18 | 0xF0))
214              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
215              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
216              .append(sal_Char((nChar & 0x3F) | 0x80));
217     else if (nChar < 0x4000000)
218         rSink.append(sal_Char(nChar >> 24 | 0xF8))
219              .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
220              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
221              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
222              .append(sal_Char((nChar & 0x3F) | 0x80));
223     else
224         rSink.append(sal_Char(nChar >> 30 | 0xFC))
225              .append(sal_Char((nChar >> 24 & 0x3F) | 0x80))
226              .append(sal_Char((nChar >> 18 & 0x3F) | 0x80))
227              .append(sal_Char((nChar >> 12 & 0x3F) | 0x80))
228              .append(sal_Char((nChar >> 6 & 0x3F) | 0x80))
229              .append(sal_Char((nChar & 0x3F) | 0x80));
230 }
231 
translateUTF8Char(const sal_Char * & rBegin,const sal_Char * pEnd,rtl_TextEncoding eEncoding,sal_uInt32 & rCharacter)232 bool translateUTF8Char(const sal_Char *& rBegin,
233                                  const sal_Char * pEnd,
234                                  rtl_TextEncoding eEncoding,
235                                  sal_uInt32 & rCharacter)
236 {
237     if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
238         || static_cast< unsigned char >(*rBegin) >= 0xFE)
239         return false;
240 
241     int nCount;
242     sal_uInt32 nMin;
243     sal_uInt32 nUCS4;
244     const sal_Char * p = rBegin;
245     if (static_cast< unsigned char >(*p) < 0xE0)
246     {
247         nCount = 1;
248         nMin = 0x80;
249         nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
250     }
251     else if (static_cast< unsigned char >(*p) < 0xF0)
252     {
253         nCount = 2;
254         nMin = 0x800;
255         nUCS4 = static_cast< unsigned char >(*p) & 0xF;
256     }
257     else if (static_cast< unsigned char >(*p) < 0xF8)
258     {
259         nCount = 3;
260         nMin = 0x10000;
261         nUCS4 = static_cast< unsigned char >(*p) & 7;
262     }
263     else if (static_cast< unsigned char >(*p) < 0xFC)
264     {
265         nCount = 4;
266         nMin = 0x200000;
267         nUCS4 = static_cast< unsigned char >(*p) & 3;
268     }
269     else
270     {
271         nCount = 5;
272         nMin = 0x4000000;
273         nUCS4 = static_cast< unsigned char >(*p) & 1;
274     }
275     ++p;
276 
277     for (; nCount-- > 0; ++p)
278         if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
279             nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
280         else
281             return false;
282 
283     if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
284         return false;
285 
286     if (eEncoding >= RTL_TEXTENCODING_UCS4)
287         rCharacter = nUCS4;
288     else
289     {
290         sal_Unicode aUTF16[2];
291         const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
292         sal_Size nSize;
293         std::unique_ptr<sal_Char[]> pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
294                                                 nSize);
295         if (!pBuffer)
296             return false;
297         DBG_ASSERT(nSize == 1,
298                    "translateUTF8Char(): Bad conversion");
299         rCharacter = pBuffer[0];
300     }
301     rBegin = p;
302     return true;
303 }
304 
305 void appendISO88591(OUStringBuffer & rText, sal_Char const * pBegin,
306                     sal_Char const * pEnd);
307 
308 struct Parameter
309 {
310     OString const m_aAttribute;
311     OString const m_aCharset;
312     OString const m_aLanguage;
313     OString const m_aValue;
314     sal_uInt32 const m_nSection;
315     bool const m_bExtended;
316 
operator <__anonf20f9e5d0111::Parameter317     bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
318     {
319         int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
320         return nComp < 0 ||
321                 (nComp == 0 && m_nSection < rhs.m_nSection);
322     }
323     struct IsSameSection // is used to check container for duplicates with std::any_of
324     {
325         const OString& rAttribute;
326         const sal_uInt32 nSection;
operator ()__anonf20f9e5d0111::Parameter::IsSameSection327         bool operator()(const Parameter& r) const
328         { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
329     };
330 };
331 
332 typedef std::forward_list<Parameter> ParameterList;
333 
334 bool parseParameters(ParameterList const & rInput,
335                      INetContentTypeParameterList * pOutput);
336 
337 //  appendISO88591
338 
appendISO88591(OUStringBuffer & rText,sal_Char const * pBegin,sal_Char const * pEnd)339 void appendISO88591(OUStringBuffer & rText, sal_Char const * pBegin,
340                     sal_Char const * pEnd)
341 {
342     sal_Int32 nLength = pEnd - pBegin;
343     std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
344     for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
345         *p++ = static_cast<unsigned char>(*pBegin++);
346     rText.append(pBuffer.get(), nLength);
347 }
348 
349 //  parseParameters
350 
parseParameters(ParameterList const & rInput,INetContentTypeParameterList * pOutput)351 bool parseParameters(ParameterList const & rInput,
352                      INetContentTypeParameterList * pOutput)
353 {
354     if (pOutput)
355         pOutput->clear();
356 
357     for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
358     {
359         if (it->m_nSection > 0
360             && (itPrev == rInput.end()
361                 || itPrev->m_nSection != it->m_nSection - 1
362                 || itPrev->m_aAttribute != it->m_aAttribute))
363             return false;
364     }
365 
366     if (pOutput)
367         for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
368         {
369             bool bCharset = !it->m_aCharset.isEmpty();
370             rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
371             if (bCharset)
372                 eEncoding
373                     = getCharsetEncoding(it->m_aCharset.getStr(),
374                                                    it->m_aCharset.getStr()
375                                                        + it->m_aCharset.getLength());
376             OUStringBuffer aValue(64);
377             bool bBadEncoding = false;
378             itNext = it;
379             do
380             {
381                 sal_Size nSize;
382                 std::unique_ptr<sal_Unicode[]> pUnicode
383                     = convertToUnicode(itNext->m_aValue.getStr(),
384                                                  itNext->m_aValue.getStr()
385                                                      + itNext->m_aValue.getLength(),
386                                                  bCharset && it->m_bExtended ?
387                                                      eEncoding :
388                                                      RTL_TEXTENCODING_UTF8,
389                                                  nSize);
390                 if (!pUnicode && !(bCharset && it->m_bExtended))
391                     pUnicode = convertToUnicode(
392                                    itNext->m_aValue.getStr(),
393                                    itNext->m_aValue.getStr()
394                                        + itNext->m_aValue.getLength(),
395                                    RTL_TEXTENCODING_ISO_8859_1, nSize);
396                 if (!pUnicode)
397                 {
398                     bBadEncoding = true;
399                     break;
400                 }
401                 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
402                 ++itNext;
403             }
404             while (itNext != rInput.end() && itNext->m_nSection != 0);
405 
406             if (bBadEncoding)
407             {
408                 aValue.setLength(0);
409                 itNext = it;
410                 do
411                 {
412                     if (itNext->m_bExtended)
413                     {
414                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
415                             aValue.append(
416                                 static_cast<sal_Unicode>(
417                                     static_cast<unsigned char>(itNext->m_aValue[i])
418                                     | 0xF800)); // map to unicode corporate use sub area
419                     }
420                     else
421                     {
422                         for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
423                             aValue.append( static_cast<char>(itNext->m_aValue[i]) );
424                     }
425                     ++itNext;
426                 }
427                 while (itNext != rInput.end() && itNext->m_nSection != 0);
428             }
429             auto const ret = pOutput->insert(
430                 {it->m_aAttribute,
431                  {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
432             SAL_INFO_IF(!ret.second, "tools",
433                 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
434         }
435     return true;
436 }
437 
438 /** Check whether some character is valid within an RFC 2045 <token>.
439 
440     @param nChar  Some UCS-4 character.
441 
442     @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
443     'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
444     '-', '.', '^', '_', '`', '{', '|', '}', or '~').
445  */
isTokenChar(sal_uInt32 nChar)446 bool isTokenChar(sal_uInt32 nChar)
447 {
448     static const bool aMap[128]
449         = { false, false, false, false, false, false, false, false,
450             false, false, false, false, false, false, false, false,
451             false, false, false, false, false, false, false, false,
452             false, false, false, false, false, false, false, false,
453             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
454             false, false,  true,  true, false,  true,  true, false, //()*+,-./
455              true,  true,  true,  true,  true,  true,  true,  true, //01234567
456              true,  true, false, false, false, false, false, false, //89:;<=>?
457             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
458              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
459              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
460              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
461              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
462              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
463              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
464              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
465           };
466     return rtl::isAscii(nChar) && aMap[nChar];
467 }
468 
skipComment(const sal_Unicode * pBegin,const sal_Unicode * pEnd)469 const sal_Unicode * skipComment(const sal_Unicode * pBegin,
470                                           const sal_Unicode * pEnd)
471 {
472     DBG_ASSERT(pBegin && pBegin <= pEnd,
473                "skipComment(): Bad sequence");
474 
475     if (pBegin != pEnd && *pBegin == '(')
476     {
477         sal_uInt32 nLevel = 0;
478         for (const sal_Unicode * p = pBegin; p != pEnd;)
479             switch (*p++)
480             {
481                 case '(':
482                     ++nLevel;
483                     break;
484 
485                 case ')':
486                     if (--nLevel == 0)
487                         return p;
488                     break;
489 
490                 case '\\':
491                     if (p != pEnd)
492                         ++p;
493                     break;
494             }
495     }
496     return pBegin;
497 }
498 
skipLinearWhiteSpaceComment(const sal_Unicode * pBegin,const sal_Unicode * pEnd)499 const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
500                                                               pBegin,
501                                                           const sal_Unicode *
502                                                               pEnd)
503 {
504     DBG_ASSERT(pBegin && pBegin <= pEnd,
505                "skipLinearWhiteSpaceComment(): Bad sequence");
506 
507     while (pBegin != pEnd)
508         switch (*pBegin)
509         {
510             case '\t':
511             case ' ':
512                 ++pBegin;
513                 break;
514 
515             case 0x0D: // CR
516                 if (startsWithLineFolding(pBegin, pEnd))
517                     pBegin += 3;
518                 else
519                     return pBegin;
520                 break;
521 
522             case '(':
523             {
524                 const sal_Unicode * p = skipComment(pBegin, pEnd);
525                 if (p == pBegin)
526                     return pBegin;
527                 pBegin = p;
528                 break;
529             }
530 
531             default:
532                 return pBegin;
533         }
534     return pBegin;
535 }
536 
skipQuotedString(const sal_Unicode * pBegin,const sal_Unicode * pEnd)537 const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
538                                                const sal_Unicode * pEnd)
539 {
540     DBG_ASSERT(pBegin && pBegin <= pEnd,
541                "skipQuotedString(): Bad sequence");
542 
543     if (pBegin != pEnd && *pBegin == '"')
544         for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
545             switch (*p++)
546             {
547                 case 0x0D: // CR
548                     if (pEnd - p < 2 || *p++ != 0x0A // LF
549                         || !isWhiteSpace(*p++))
550                         return pBegin;
551                     break;
552 
553                 case '"':
554                     return p;
555 
556                 case '\\':
557                     if (p != pEnd)
558                         ++p;
559                     break;
560             }
561     return pBegin;
562 }
563 
scanParameters(sal_Unicode const * pBegin,sal_Unicode const * pEnd,INetContentTypeParameterList * pParameters)564 sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
565                                              sal_Unicode const * pEnd,
566                                              INetContentTypeParameterList *
567                                                  pParameters)
568 {
569     ParameterList aList;
570     sal_Unicode const * pParameterBegin = pBegin;
571     for (sal_Unicode const * p = pParameterBegin;;)
572     {
573         pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
574         if (pParameterBegin == pEnd || *pParameterBegin != ';')
575             break;
576         p = pParameterBegin + 1;
577 
578         sal_Unicode const * pAttributeBegin
579             = skipLinearWhiteSpaceComment(p, pEnd);
580         p = pAttributeBegin;
581         bool bDowncaseAttribute = false;
582         while (p != pEnd && isTokenChar(*p) && *p != '*')
583         {
584             bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
585             ++p;
586         }
587         if (p == pAttributeBegin)
588             break;
589         OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
590         if (bDowncaseAttribute)
591             aAttribute = aAttribute.toAsciiLowerCase();
592 
593         sal_uInt32 nSection = 0;
594         if (p != pEnd && *p == '*')
595         {
596             ++p;
597             if (p != pEnd && rtl::isAsciiDigit(*p)
598                 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
599                 break;
600         }
601 
602         bool bPresent = std::any_of(aList.begin(), aList.end(),
603                                     Parameter::IsSameSection{aAttribute, nSection});
604         if (bPresent)
605             break;
606 
607         bool bExtended = false;
608         if (p != pEnd && *p == '*')
609         {
610             ++p;
611             bExtended = true;
612         }
613 
614         p = skipLinearWhiteSpaceComment(p, pEnd);
615 
616         if (p == pEnd || *p != '=')
617             break;
618 
619         p = skipLinearWhiteSpaceComment(p + 1, pEnd);
620 
621         OString aCharset;
622         OString aLanguage;
623         OString aValue;
624         if (bExtended)
625         {
626             if (nSection == 0)
627             {
628                 sal_Unicode const * pCharsetBegin = p;
629                 bool bDowncaseCharset = false;
630                 while (p != pEnd && isTokenChar(*p) && *p != '\'')
631                 {
632                     bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
633                     ++p;
634                 }
635                 if (p == pCharsetBegin)
636                     break;
637                 if (pParameters)
638                 {
639                     aCharset = OString(
640                         pCharsetBegin,
641                         p - pCharsetBegin,
642                         RTL_TEXTENCODING_ASCII_US);
643                     if (bDowncaseCharset)
644                         aCharset = aCharset.toAsciiLowerCase();
645                 }
646 
647                 if (p == pEnd || *p != '\'')
648                     break;
649                 ++p;
650 
651                 sal_Unicode const * pLanguageBegin = p;
652                 bool bDowncaseLanguage = false;
653                 int nLetters = 0;
654                 for (; p != pEnd; ++p)
655                     if (rtl::isAsciiAlpha(*p))
656                     {
657                         if (++nLetters > 8)
658                             break;
659                         bDowncaseLanguage = bDowncaseLanguage
660                                             || rtl::isAsciiUpperCase(*p);
661                     }
662                     else if (*p == '-')
663                     {
664                         if (nLetters == 0)
665                             break;
666                         nLetters = 0;
667                     }
668                     else
669                         break;
670                 if (nLetters == 0 || nLetters > 8)
671                     break;
672                 if (pParameters)
673                 {
674                     aLanguage = OString(
675                         pLanguageBegin,
676                         p - pLanguageBegin,
677                         RTL_TEXTENCODING_ASCII_US);
678                     if (bDowncaseLanguage)
679                         aLanguage = aLanguage.toAsciiLowerCase();
680                 }
681 
682                 if (p == pEnd || *p != '\'')
683                     break;
684                 ++p;
685             }
686             if (pParameters)
687             {
688                 OStringBuffer aSink;
689                 while (p != pEnd)
690                 {
691                     auto q = p;
692                     sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
693                     if (rtl::isAscii(nChar) && !isTokenChar(nChar))
694                         break;
695                     p = q;
696                     if (nChar == '%' && p + 1 < pEnd)
697                     {
698                         int nWeight1 = INetMIME::getHexWeight(p[0]);
699                         int nWeight2 = INetMIME::getHexWeight(p[1]);
700                         if (nWeight1 >= 0 && nWeight2 >= 0)
701                         {
702                             aSink.append(sal_Char(nWeight1 << 4 | nWeight2));
703                             p += 2;
704                             continue;
705                         }
706                     }
707                     writeUTF8(aSink, nChar);
708                 }
709                 aValue = aSink.makeStringAndClear();
710             }
711             else
712                 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
713                     ++p;
714         }
715         else if (p != pEnd && *p == '"')
716             if (pParameters)
717             {
718                 OStringBuffer aSink(256);
719                 bool bInvalid = false;
720                 for (++p;;)
721                 {
722                     if (p == pEnd)
723                     {
724                         bInvalid = true;
725                         break;
726                     }
727                     sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
728                     if (nChar == '"')
729                         break;
730                     else if (nChar == 0x0D) // CR
731                     {
732                         if (pEnd - p < 2 || *p++ != 0x0A // LF
733                             || !isWhiteSpace(*p))
734                         {
735                             bInvalid = true;
736                             break;
737                         }
738                         nChar = static_cast<unsigned char>(*p++);
739                     }
740                     else if (nChar == '\\')
741                     {
742                         if (p == pEnd)
743                         {
744                             bInvalid = true;
745                             break;
746                         }
747                         nChar = INetMIME::getUTF32Character(p, pEnd);
748                     }
749                     writeUTF8(aSink, nChar);
750                 }
751                 if (bInvalid)
752                     break;
753                 aValue = aSink.makeStringAndClear();
754             }
755             else
756             {
757                 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
758                 if (p == pStringEnd)
759                     break;
760                 p = pStringEnd;
761             }
762         else
763         {
764             sal_Unicode const * pTokenBegin = p;
765             while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
766                 ++p;
767             if (p == pTokenBegin)
768                 break;
769             if (pParameters)
770                 aValue = OString(
771                     pTokenBegin, p - pTokenBegin,
772                     RTL_TEXTENCODING_UTF8);
773         }
774         aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
775     }
776     aList.sort();
777     return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
778 }
779 
equalIgnoreCase(const sal_Char * pBegin1,const sal_Char * pEnd1,const sal_Char * pString2)780 bool equalIgnoreCase(const sal_Char * pBegin1,
781                                const sal_Char * pEnd1,
782                                const sal_Char * pString2)
783 {
784     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
785                "equalIgnoreCase(): Bad sequences");
786 
787     while (*pString2 != 0)
788         if (pBegin1 == pEnd1
789             || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
790                 != rtl::toAsciiUpperCase(
791                     static_cast<unsigned char>(*pString2++))))
792             return false;
793     return pBegin1 == pEnd1;
794 }
795 
796 struct EncodingEntry
797 {
798     sal_Char const * m_aName;
799     rtl_TextEncoding const m_eEncoding;
800 };
801 
802 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
803 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
804 // noted:
805 static EncodingEntry const aEncodingMap[]
806     = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
807         { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
808         { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
809         { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
810         { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
811         { "ASCII", RTL_TEXTENCODING_ASCII_US },
812         { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
813         { "US", RTL_TEXTENCODING_ASCII_US },
814         { "IBM367", RTL_TEXTENCODING_ASCII_US },
815         { "CP367", RTL_TEXTENCODING_ASCII_US },
816         { "CSASCII", RTL_TEXTENCODING_ASCII_US },
817         { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
818         { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
819         { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
820         { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
821         { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
822         { "L1", RTL_TEXTENCODING_ISO_8859_1 },
823         { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
824         { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
825         { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
826         { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
827         { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
828         { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
829         { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
830         { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
831         { "L2", RTL_TEXTENCODING_ISO_8859_2 },
832         { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
833         { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
834         { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
835         { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
836         { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
837         { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
838         { "L3", RTL_TEXTENCODING_ISO_8859_3 },
839         { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
840         { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
841         { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
842         { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
843         { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
844         { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
845         { "L4", RTL_TEXTENCODING_ISO_8859_4 },
846         { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
847         { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
848         { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
849         { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
850         { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
851         { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
852         { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
853         { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
854         { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
855         { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
856         { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
857         { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
858         { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
859         { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
860         { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
861         { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
862         { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
863         { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
864         { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
865         { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
866         { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
867         { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
868         { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
869         { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
870         { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
871         { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
872         { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
873         { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
874         { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
875         { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
876         { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
877         { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
878         { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
879         { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
880         { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
881         { "L5", RTL_TEXTENCODING_ISO_8859_9 },
882         { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
883         { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
884         { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
885         { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
886         { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
887         { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
888         { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
889         { "IBM437", RTL_TEXTENCODING_IBM_437 },
890         { "CP437", RTL_TEXTENCODING_IBM_437 },
891         { "437", RTL_TEXTENCODING_IBM_437 },
892         { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
893         { "IBM850", RTL_TEXTENCODING_IBM_850 },
894         { "CP850", RTL_TEXTENCODING_IBM_850 },
895         { "850", RTL_TEXTENCODING_IBM_850 },
896         { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
897         { "IBM860", RTL_TEXTENCODING_IBM_860 },
898         { "CP860", RTL_TEXTENCODING_IBM_860 },
899         { "860", RTL_TEXTENCODING_IBM_860 },
900         { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
901         { "IBM861", RTL_TEXTENCODING_IBM_861 },
902         { "CP861", RTL_TEXTENCODING_IBM_861 },
903         { "861", RTL_TEXTENCODING_IBM_861 },
904         { "CP-IS", RTL_TEXTENCODING_IBM_861 },
905         { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
906         { "IBM863", RTL_TEXTENCODING_IBM_863 },
907         { "CP863", RTL_TEXTENCODING_IBM_863 },
908         { "863", RTL_TEXTENCODING_IBM_863 },
909         { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
910         { "IBM865", RTL_TEXTENCODING_IBM_865 },
911         { "CP865", RTL_TEXTENCODING_IBM_865 },
912         { "865", RTL_TEXTENCODING_IBM_865 },
913         { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
914         { "IBM775", RTL_TEXTENCODING_IBM_775 },
915         { "CP775", RTL_TEXTENCODING_IBM_775 },
916         { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
917         { "IBM852", RTL_TEXTENCODING_IBM_852 },
918         { "CP852", RTL_TEXTENCODING_IBM_852 },
919         { "852", RTL_TEXTENCODING_IBM_852 },
920         { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
921         { "IBM855", RTL_TEXTENCODING_IBM_855 },
922         { "CP855", RTL_TEXTENCODING_IBM_855 },
923         { "855", RTL_TEXTENCODING_IBM_855 },
924         { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
925         { "IBM857", RTL_TEXTENCODING_IBM_857 },
926         { "CP857", RTL_TEXTENCODING_IBM_857 },
927         { "857", RTL_TEXTENCODING_IBM_857 },
928         { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
929         { "IBM862", RTL_TEXTENCODING_IBM_862 },
930         { "CP862", RTL_TEXTENCODING_IBM_862 },
931         { "862", RTL_TEXTENCODING_IBM_862 },
932         { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
933         { "IBM864", RTL_TEXTENCODING_IBM_864 },
934         { "CP864", RTL_TEXTENCODING_IBM_864 },
935         { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
936         { "IBM866", RTL_TEXTENCODING_IBM_866 },
937         { "CP866", RTL_TEXTENCODING_IBM_866 },
938         { "866", RTL_TEXTENCODING_IBM_866 },
939         { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
940         { "IBM869", RTL_TEXTENCODING_IBM_869 },
941         { "CP869", RTL_TEXTENCODING_IBM_869 },
942         { "869", RTL_TEXTENCODING_IBM_869 },
943         { "CP-GR", RTL_TEXTENCODING_IBM_869 },
944         { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
945         { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
946         { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
947         { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
948         { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
949         { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
950         { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
951         { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
952         { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
953         { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
954         { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
955         { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
956         { "GB2312", RTL_TEXTENCODING_GB_2312 },
957         { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
958         { "BIG5", RTL_TEXTENCODING_BIG5 },
959         { "CSBIG5", RTL_TEXTENCODING_BIG5 },
960         { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
961         { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
962           RTL_TEXTENCODING_EUC_JP },
963         { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
964         { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
965         { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
966         { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
967         { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
968         { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
969         { "UTF-7", RTL_TEXTENCODING_UTF7 },
970         { "UTF-8", RTL_TEXTENCODING_UTF8 },
971         { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
972         { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
973         { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
974         { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
975         { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
976         { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
977         { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
978         { "CSUCS4", RTL_TEXTENCODING_UCS4 },
979         { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
980         { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
981 
getCharsetEncoding(sal_Char const * pBegin,sal_Char const * pEnd)982 rtl_TextEncoding getCharsetEncoding(sal_Char const * pBegin,
983                                               sal_Char const * pEnd)
984 {
985     for (const EncodingEntry& i : aEncodingMap)
986         if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
987             return i.m_eEncoding;
988     return RTL_TEXTENCODING_DONTKNOW;
989 }
990 
991 }
992 
993 //  INetMIME
994 
995 // static
isAtomChar(sal_uInt32 nChar)996 bool INetMIME::isAtomChar(sal_uInt32 nChar)
997 {
998     static const bool aMap[128]
999         = { false, false, false, false, false, false, false, false,
1000             false, false, false, false, false, false, false, false,
1001             false, false, false, false, false, false, false, false,
1002             false, false, false, false, false, false, false, false,
1003             false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
1004             false, false,  true,  true, false,  true, false,  true, //()*+,-./
1005              true,  true,  true,  true,  true,  true,  true,  true, //01234567
1006              true,  true, false, false, false,  true, false,  true, //89:;<=>?
1007             false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
1008              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
1009              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
1010              true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
1011              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
1012              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
1013              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
1014              true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
1015           };
1016     return rtl::isAscii(nChar) && aMap[nChar];
1017 }
1018 
1019 // static
isIMAPAtomChar(sal_uInt32 nChar)1020 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
1021 {
1022     static const bool aMap[128]
1023         = { false, false, false, false, false, false, false, false,
1024             false, false, false, false, false, false, false, false,
1025             false, false, false, false, false, false, false, false,
1026             false, false, false, false, false, false, false, false,
1027             false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
1028             false, false, false,  true,  true,  true,  true,  true, //()*+,-./
1029              true,  true,  true,  true,  true,  true,  true,  true, //01234567
1030              true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
1031              true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
1032              true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
1033              true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
1034              true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
1035              true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
1036              true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
1037              true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
1038              true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
1039           };
1040     return rtl::isAscii(nChar) && aMap[nChar];
1041 }
1042 
1043 // static
equalIgnoreCase(const sal_Unicode * pBegin1,const sal_Unicode * pEnd1,const sal_Char * pString2)1044 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
1045                                const sal_Unicode * pEnd1,
1046                                const sal_Char * pString2)
1047 {
1048     DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
1049                "INetMIME::equalIgnoreCase(): Bad sequences");
1050 
1051     while (*pString2 != 0)
1052         if (pBegin1 == pEnd1
1053             || (rtl::toAsciiUpperCase(*pBegin1++)
1054                 != rtl::toAsciiUpperCase(
1055                     static_cast<unsigned char>(*pString2++))))
1056             return false;
1057     return pBegin1 == pEnd1;
1058 }
1059 
1060 // static
scanUnsigned(const sal_Unicode * & rBegin,const sal_Unicode * pEnd,bool bLeadingZeroes,sal_uInt32 & rValue)1061 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
1062                             const sal_Unicode * pEnd, bool bLeadingZeroes,
1063                             sal_uInt32 & rValue)
1064 {
1065     sal_uInt64 nTheValue = 0;
1066     const sal_Unicode * p = rBegin;
1067     for ( ; p != pEnd; ++p)
1068     {
1069         int nWeight = getWeight(*p);
1070         if (nWeight < 0)
1071             break;
1072         nTheValue = 10 * nTheValue + nWeight;
1073         if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1074             return false;
1075     }
1076     if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1077         return false;
1078     rBegin = p;
1079     rValue = sal_uInt32(nTheValue);
1080     return true;
1081 }
1082 
1083 // static
scanContentType(OUString const & rStr,OUString * pType,OUString * pSubType,INetContentTypeParameterList * pParameters)1084 sal_Unicode const * INetMIME::scanContentType(
1085     OUString const & rStr, OUString * pType,
1086     OUString * pSubType, INetContentTypeParameterList * pParameters)
1087 {
1088     sal_Unicode const * pBegin = rStr.getStr();
1089     sal_Unicode const * pEnd = pBegin + rStr.getLength();
1090     sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1091     sal_Unicode const * pTypeBegin = p;
1092     while (p != pEnd && isTokenChar(*p))
1093     {
1094         ++p;
1095     }
1096     if (p == pTypeBegin)
1097         return nullptr;
1098     sal_Unicode const * pTypeEnd = p;
1099 
1100     p = skipLinearWhiteSpaceComment(p, pEnd);
1101     if (p == pEnd || *p++ != '/')
1102         return nullptr;
1103 
1104     p = skipLinearWhiteSpaceComment(p, pEnd);
1105     sal_Unicode const * pSubTypeBegin = p;
1106     while (p != pEnd && isTokenChar(*p))
1107     {
1108         ++p;
1109     }
1110     if (p == pSubTypeBegin)
1111         return nullptr;
1112     sal_Unicode const * pSubTypeEnd = p;
1113 
1114     if (pType != nullptr)
1115     {
1116         *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1117     }
1118     if (pSubType != nullptr)
1119     {
1120         *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1121             .toAsciiLowerCase();
1122     }
1123 
1124     return scanParameters(p, pEnd, pParameters);
1125 }
1126 
1127 // static
decodeHeaderFieldBody(const OString & rBody)1128 OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1129 {
1130     // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1131     // versions of StarOffice send mails with header fields where encoded
1132     // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1133     // '=', ',', '.', '"', ')', without any required white space in between.
1134     // And there appear to exist some broken mailers that only encode single
1135     // letters within words, like "Appel
1136     // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1137     // detect encoded words even when not properly surrounded by white space.
1138 
1139     // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1140 
1141     // encoded-word = "=?"
1142     //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1143     //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1144     //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1145     //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1146     //     "?="
1147 
1148     // base64 = ALPHA / DIGIT / "+" / "/"
1149 
1150     const sal_Char * pBegin = rBody.getStr();
1151     const sal_Char * pEnd = pBegin + rBody.getLength();
1152 
1153     OUStringBuffer sDecoded;
1154     const sal_Char * pCopyBegin = pBegin;
1155 
1156     /* bool bStartEncodedWord = true; */
1157     const sal_Char * pWSPBegin = pBegin;
1158 
1159     for (const sal_Char * p = pBegin; p != pEnd;)
1160     {
1161         OUString sEncodedText;
1162         if (*p == '=' /* && bStartEncodedWord */)
1163         {
1164             const sal_Char * q = p + 1;
1165             bool bEncodedWord = q != pEnd && *q++ == '?';
1166 
1167             rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1168             if (bEncodedWord)
1169             {
1170                 const sal_Char * pCharsetBegin = q;
1171                 const sal_Char * pLanguageBegin = nullptr;
1172                 int nAlphaCount = 0;
1173                 for (bool bDone = false; !bDone;)
1174                     if (q == pEnd)
1175                     {
1176                         bEncodedWord = false;
1177                         bDone = true;
1178                     }
1179                     else
1180                     {
1181                         sal_Char cChar = *q++;
1182                         switch (cChar)
1183                         {
1184                             case '*':
1185                                 pLanguageBegin = q - 1;
1186                                 nAlphaCount = 0;
1187                                 break;
1188 
1189                             case '-':
1190                                 if (pLanguageBegin != nullptr)
1191                                 {
1192                                     if (nAlphaCount == 0)
1193                                         pLanguageBegin = nullptr;
1194                                     else
1195                                         nAlphaCount = 0;
1196                                 }
1197                                 break;
1198 
1199                             case '?':
1200                                 if (pCharsetBegin == q - 1)
1201                                     bEncodedWord = false;
1202                                 else
1203                                 {
1204                                     eCharsetEncoding
1205                                         = getCharsetEncoding(
1206                                               pCharsetBegin,
1207                                               pLanguageBegin == nullptr
1208                                               || nAlphaCount == 0 ?
1209                                                   q - 1 : pLanguageBegin);
1210                                     bEncodedWord = isMIMECharsetEncoding(
1211                                                        eCharsetEncoding);
1212                                     eCharsetEncoding
1213                                         = translateFromMIME(eCharsetEncoding);
1214                                 }
1215                                 bDone = true;
1216                                 break;
1217 
1218                             default:
1219                                 if (pLanguageBegin != nullptr
1220                                     && (!rtl::isAsciiAlpha(
1221                                             static_cast<unsigned char>(cChar))
1222                                         || ++nAlphaCount > 8))
1223                                     pLanguageBegin = nullptr;
1224                                 break;
1225                         }
1226                     }
1227             }
1228 
1229             bool bEncodingB = false;
1230             if (bEncodedWord)
1231             {
1232                 if (q == pEnd)
1233                     bEncodedWord = false;
1234                 else
1235                 {
1236                     switch (*q++)
1237                     {
1238                         case 'B':
1239                         case 'b':
1240                             bEncodingB = true;
1241                             break;
1242 
1243                         case 'Q':
1244                         case 'q':
1245                             bEncodingB = false;
1246                             break;
1247 
1248                         default:
1249                             bEncodedWord = false;
1250                             break;
1251                     }
1252                 }
1253             }
1254 
1255             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1256 
1257             OStringBuffer sText;
1258             if (bEncodedWord)
1259             {
1260                 if (bEncodingB)
1261                 {
1262                     for (bool bDone = false; !bDone;)
1263                     {
1264                         if (pEnd - q < 4)
1265                         {
1266                             bEncodedWord = false;
1267                             bDone = true;
1268                         }
1269                         else
1270                         {
1271                             bool bFinal = false;
1272                             int nCount = 3;
1273                             sal_uInt32 nValue = 0;
1274                             for (int nShift = 18; nShift >= 0; nShift -= 6)
1275                             {
1276                                 int nWeight = getBase64Weight(*q++);
1277                                 if (nWeight == -2)
1278                                 {
1279                                     bEncodedWord = false;
1280                                     bDone = true;
1281                                     break;
1282                                 }
1283                                 if (nWeight == -1)
1284                                 {
1285                                     if (!bFinal)
1286                                     {
1287                                         if (nShift >= 12)
1288                                         {
1289                                             bEncodedWord = false;
1290                                             bDone = true;
1291                                             break;
1292                                         }
1293                                         bFinal = true;
1294                                         nCount = nShift == 6 ? 1 : 2;
1295                                     }
1296                                 }
1297                                 else
1298                                     nValue |= nWeight << nShift;
1299                             }
1300                             if (bEncodedWord)
1301                             {
1302                                 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1303                                     sText.append(sal_Char(nValue >> nShift & 0xFF));
1304                                 if (*q == '?')
1305                                 {
1306                                     ++q;
1307                                     bDone = true;
1308                                 }
1309                                 if (bFinal && !bDone)
1310                                 {
1311                                     bEncodedWord = false;
1312                                     bDone = true;
1313                                 }
1314                             }
1315                         }
1316                     }
1317                 }
1318                 else
1319                 {
1320                     const sal_Char * pEncodedTextBegin = q;
1321                     const sal_Char * pEncodedTextCopyBegin = q;
1322                     for (bool bDone = false; !bDone;)
1323                         if (q == pEnd)
1324                         {
1325                             bEncodedWord = false;
1326                             bDone = true;
1327                         }
1328                         else
1329                         {
1330                             sal_uInt32 nChar = *q++;
1331                             switch (nChar)
1332                             {
1333                                 case '=':
1334                                 {
1335                                     if (pEnd - q < 2)
1336                                     {
1337                                         bEncodedWord = false;
1338                                         bDone = true;
1339                                         break;
1340                                     }
1341                                     int nDigit1 = getHexWeight(q[0]);
1342                                     int nDigit2 = getHexWeight(q[1]);
1343                                     if (nDigit1 < 0 || nDigit2 < 0)
1344                                     {
1345                                         bEncodedWord = false;
1346                                         bDone = true;
1347                                         break;
1348                                     }
1349                                     sText.append(rBody.copy(
1350                                         (pEncodedTextCopyBegin - pBegin),
1351                                         (q - 1 - pEncodedTextCopyBegin)));
1352                                     sText.append(sal_Char(nDigit1 << 4 | nDigit2));
1353                                     q += 2;
1354                                     pEncodedTextCopyBegin = q;
1355                                     break;
1356                                 }
1357 
1358                                 case '?':
1359                                     if (q - pEncodedTextBegin > 1)
1360                                         sText.append(rBody.copy(
1361                                             (pEncodedTextCopyBegin - pBegin),
1362                                             (q - 1 - pEncodedTextCopyBegin)));
1363                                     else
1364                                         bEncodedWord = false;
1365                                     bDone = true;
1366                                     break;
1367 
1368                                 case '_':
1369                                     sText.append(rBody.copy(
1370                                         (pEncodedTextCopyBegin - pBegin),
1371                                         (q - 1 - pEncodedTextCopyBegin)));
1372                                     sText.append(' ');
1373                                     pEncodedTextCopyBegin = q;
1374                                     break;
1375 
1376                                 default:
1377                                     if (!isVisible(nChar))
1378                                     {
1379                                         bEncodedWord = false;
1380                                         bDone = true;
1381                                     }
1382                                     break;
1383                             }
1384                         }
1385                 }
1386             }
1387 
1388             bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1389 
1390             std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1391             sal_Size nUnicodeSize = 0;
1392             if (bEncodedWord)
1393             {
1394                 pUnicodeBuffer
1395                     = convertToUnicode(sText.getStr(),
1396                                        sText.getStr() + sText.getLength(),
1397                                        eCharsetEncoding, nUnicodeSize);
1398                 if (!pUnicodeBuffer)
1399                     bEncodedWord = false;
1400             }
1401 
1402             if (bEncodedWord)
1403             {
1404                 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1405                 sDecoded.append(
1406                     pUnicodeBuffer.get(),
1407                     static_cast< sal_Int32 >(nUnicodeSize));
1408                 pUnicodeBuffer.reset();
1409                 p = q;
1410                 pCopyBegin = p;
1411 
1412                 pWSPBegin = p;
1413                 while (p != pEnd && isWhiteSpace(*p))
1414                     ++p;
1415                 /* bStartEncodedWord = p != pWSPBegin; */
1416                 continue;
1417             }
1418         }
1419 
1420         if (!sEncodedText.isEmpty())
1421             sDecoded.append(sEncodedText);
1422 
1423         if (p == pEnd)
1424             break;
1425 
1426         switch (*p++)
1427         {
1428             case '"':
1429                 /* bStartEncodedWord = true; */
1430                 break;
1431 
1432             case '(':
1433                 /* bStartEncodedWord = true; */
1434                 break;
1435 
1436             case ')':
1437                 /* bStartEncodedWord = false; */
1438                 break;
1439 
1440             default:
1441             {
1442                 const sal_Char * pUTF8Begin = p - 1;
1443                 const sal_Char * pUTF8End = pUTF8Begin;
1444                 sal_uInt32 nCharacter = 0;
1445                 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
1446                                       nCharacter))
1447                 {
1448                     appendISO88591(sDecoded, pCopyBegin, p - 1);
1449                     sal_Unicode aUTF16Buf[2];
1450                     sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
1451                     sDecoded.append(aUTF16Buf, nUTF16Len);
1452                     p = pUTF8End;
1453                     pCopyBegin = p;
1454                 }
1455                 /* bStartEncodedWord = false; */
1456                 break;
1457             }
1458         }
1459         pWSPBegin = p;
1460     }
1461 
1462     appendISO88591(sDecoded, pCopyBegin, pEnd);
1463     return sDecoded.makeStringAndClear();
1464 }
1465 
1466 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1467