1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <tools/debug.hxx>
21 #include <tools/stream.hxx>
22 #include <tools/solar.h>
23 #include <rtl/string.hxx>
24 #include <svtools/rtfkeywd.hxx>
25 #include <svtools/rtfout.hxx>
26
27 namespace {
28
Out_Hex(SvStream & rStream,sal_uLong nHex,sal_uInt8 nLen)29 SvStream& Out_Hex( SvStream& rStream, sal_uLong nHex, sal_uInt8 nLen )
30 {
31 char aNToABuf[] = "0000000000000000";
32
33 DBG_ASSERT( nLen < sizeof(aNToABuf), "too many places" );
34 if( nLen >= sizeof(aNToABuf) )
35 nLen = (sizeof(aNToABuf)-1);
36
37 // set pointer to end of buffer
38 char* pStr = aNToABuf + (sizeof(aNToABuf)-1);
39 for( sal_uInt8 n = 0; n < nLen; ++n )
40 {
41 *(--pStr) = static_cast<char>(nHex & 0xf ) + 48;
42 if( *pStr > '9' )
43 *pStr += 39;
44 nHex >>= 4;
45 }
46 return rStream.WriteCharPtr( pStr );
47 }
48
49 // Ideally, this function should work on (sal_uInt32) Unicode scalar values
50 // instead of (sal_Unicode) UTF-16 code units. However, at least "Rich Text
51 // Format (RTF) Specification Version 1.9.1" available at
52 // <https://www.microsoft.com/en-us/download/details.aspx?id=10725> does not
53 // look like it allows non-BMP Unicode characters >= 0x10000 in the \uN notation
54 // (it only talks about "Unicode character", but then explains how values of N
55 // greater than 32767 will be expressed as negative signed 16-bit numbers, so
56 // that smells like \uN is limited to BMP).
57 // However the "Mathematics" section has an example that shows the code point
58 // U+1D44E being encoded as UTF-16 surrogate pair "\u-10187?\u-9138?", so
59 // sal_Unicode actually works fine here.
Out_Char(SvStream & rStream,sal_Unicode c,int * pUCMode,rtl_TextEncoding eDestEnc)60 SvStream& Out_Char(SvStream& rStream, sal_Unicode c,
61 int *pUCMode, rtl_TextEncoding eDestEnc)
62 {
63 const char* pStr = nullptr;
64 switch (c)
65 {
66 case 0x1:
67 case 0x2:
68 // this are control character of our textattributes and will never be
69 // written
70 break;
71 case 0xA0:
72 rStream.WriteCharPtr( "\\~" );
73 break;
74 case 0xAD:
75 rStream.WriteCharPtr( "\\-" );
76 break;
77 case 0x2011:
78 rStream.WriteCharPtr( "\\_" );
79 break;
80 case '\n':
81 pStr = OOO_STRING_SVTOOLS_RTF_LINE;
82 break;
83 case '\t':
84 pStr = OOO_STRING_SVTOOLS_RTF_TAB;
85 break;
86 default:
87 switch(c)
88 {
89 case 149:
90 pStr = OOO_STRING_SVTOOLS_RTF_BULLET;
91 break;
92 case 150:
93 pStr = OOO_STRING_SVTOOLS_RTF_ENDASH;
94 break;
95 case 151:
96 pStr = OOO_STRING_SVTOOLS_RTF_EMDASH;
97 break;
98 case 145:
99 pStr = OOO_STRING_SVTOOLS_RTF_LQUOTE;
100 break;
101 case 146:
102 pStr = OOO_STRING_SVTOOLS_RTF_RQUOTE;
103 break;
104 case 147:
105 pStr = OOO_STRING_SVTOOLS_RTF_LDBLQUOTE;
106 break;
107 case 148:
108 pStr = OOO_STRING_SVTOOLS_RTF_RDBLQUOTE;
109 break;
110 }
111
112 if (pStr)
113 break;
114
115 switch (c)
116 {
117 case '\\':
118 case '}':
119 case '{':
120 rStream.WriteChar( '\\' ).WriteChar( char(c) );
121 break;
122 default:
123 if (c >= ' ' && c <= '~')
124 rStream.WriteChar( char(c) );
125 else
126 {
127 //If we can't convert to the dest encoding, or if
128 //it's an uncommon multibyte sequence which most
129 //readers won't be able to handle correctly, then
130 //export as unicode
131 OUString sBuf(&c, 1);
132 OString sConverted;
133 sal_uInt32 const nFlags =
134 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR |
135 RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR;
136 bool bWriteAsUnicode = !(sBuf.convertToString(&sConverted,
137 eDestEnc, nFlags))
138 || (RTL_TEXTENCODING_UTF8==eDestEnc); // #i43933# do not export UTF-8 chars in RTF;
139 if (bWriteAsUnicode)
140 {
141 (void)sBuf.convertToString(&sConverted,
142 eDestEnc, OUSTRING_TO_OSTRING_CVTFLAGS);
143 }
144 const sal_Int32 nLen = sConverted.getLength();
145
146 if (bWriteAsUnicode && pUCMode)
147 {
148 // then write as unicode - character
149 if (*pUCMode != nLen)
150 {
151 // #i47831# add an additional whitespace, so that
152 // "document whitespaces" are not ignored.;
153 rStream.WriteCharPtr( "\\uc" )
154 .WriteOString( OString::number(nLen) ).WriteCharPtr( " " );
155 *pUCMode = nLen;
156 }
157 rStream.WriteCharPtr( "\\u" )
158 .WriteCharPtr( OString::number(
159 static_cast<sal_Int32>(c)).getStr() );
160 }
161
162 for (sal_Int32 nI = 0; nI < nLen; ++nI)
163 {
164 rStream.WriteCharPtr( "\\'" );
165 Out_Hex(rStream, sConverted[nI], 2);
166 }
167 }
168 break;
169 }
170 break;
171 }
172
173 if (pStr)
174 rStream.WriteCharPtr( pStr ).WriteChar( ' ' );
175
176 return rStream;
177 }
178
179 }
180
Out_String(SvStream & rStream,const OUString & rStr,rtl_TextEncoding eDestEnc)181 SvStream& RTFOutFuncs::Out_String( SvStream& rStream, const OUString& rStr,
182 rtl_TextEncoding eDestEnc)
183 {
184 int nUCMode = 1;
185 for (sal_Int32 n = 0; n < rStr.getLength(); ++n)
186 Out_Char(rStream, rStr[n], &nUCMode, eDestEnc);
187 if (nUCMode != 1)
188 rStream.WriteCharPtr( "\\uc1" ).WriteCharPtr( " " ); // #i47831# add an additional whitespace, so that "document whitespaces" are not ignored.;
189 return rStream;
190 }
191
192 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
193