1 //========================================================================
2 //
3 // This file comes from pdftohtml project
4 // http://pdftohtml.sourceforge.net
5 //
6 // Copyright from:
7 // Gueorgui Ovtcharov
8 // Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
9 // Mikhail Kruk <meshko@cs.brandeis.edu>
10 //
11 //========================================================================
12
13 //========================================================================
14 //
15 // Modified under the Poppler project - http://poppler.freedesktop.org
16 //
17 // All changes made under the Poppler project to this file are licensed
18 // under GPL version 2 or later
19 //
20 // Copyright (C) 2007, 2010, 2012, 2018, 2020 Albert Astals Cid <aacid@kde.org>
21 // Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
22 // Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
23 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
24 // Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
25 // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
26 // Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com>
27 // Copyright (C) 2012 Luis Parravicini <lparravi@gmail.com>
28 // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
29 // Copyright (C) 2017 Jason Crain <jason@inspiresomeone.us>
30 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
31 // Copyright (C) 2018 Steven Boswell <ulatekh@yahoo.com>
32 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
33 // Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
34 // Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com>
35 //
36 // To see a description of the changes please see the Changelog file that
37 // came with your tarball or type make ChangeLog if you are building from git
38 //
39 //========================================================================
40
41 #include "HtmlFonts.h"
42 #include "HtmlUtils.h"
43 #include "GlobalParams.h"
44 #include "UnicodeMap.h"
45 #include "GfxFont.h"
46 #include <cstdio>
47
48 namespace {
49
50 const char *const defaultFamilyName = "Times";
51
52 const char *const styleSuffixes[] = {
53 "-Regular", "-Bold", "-BoldOblique", "-BoldItalic", "-Oblique", "-Italic", "-Roman",
54 };
55
removeStyleSuffix(std::string & familyName)56 void removeStyleSuffix(std::string &familyName)
57 {
58 for (const char *const styleSuffix : styleSuffixes) {
59 auto pos = familyName.rfind(styleSuffix);
60 if (pos != std::string::npos) {
61 familyName.resize(pos);
62 return;
63 }
64 }
65 }
66
67 }
68
69 #define xoutRound(x) ((int)(x + 0.5))
70 extern bool xml;
71 extern bool fontFullName;
72
HtmlFontColor(GfxRGB rgb,double opacity_)73 HtmlFontColor::HtmlFontColor(GfxRGB rgb, double opacity_)
74 {
75 r = static_cast<int>(rgb.r / 65535.0 * 255.0);
76 g = static_cast<int>(rgb.g / 65535.0 * 255.0);
77 b = static_cast<int>(rgb.b / 65535.0 * 255.0);
78 opacity = static_cast<int>(opacity_ * 255.999);
79 if (!(Ok(r) && Ok(b) && Ok(g) && Ok(opacity))) {
80 if (!globalParams->getErrQuiet())
81 fprintf(stderr, "Error : Bad color (%d,%d,%d,%d) reset to (0,0,0,255)\n", r, g, b, opacity);
82 r = 0;
83 g = 0;
84 b = 0;
85 opacity = 255;
86 }
87 }
88
convtoX(unsigned int xcol) const89 GooString *HtmlFontColor::convtoX(unsigned int xcol) const
90 {
91 GooString *xret = new GooString();
92 char tmp;
93 unsigned int k;
94 k = (xcol / 16);
95 if (k < 10)
96 tmp = (char)('0' + k);
97 else
98 tmp = (char)('a' + k - 10);
99 xret->append(tmp);
100 k = (xcol % 16);
101 if (k < 10)
102 tmp = (char)('0' + k);
103 else
104 tmp = (char)('a' + k - 10);
105 xret->append(tmp);
106 return xret;
107 }
108
toString() const109 GooString *HtmlFontColor::toString() const
110 {
111 GooString *tmp = new GooString("#");
112 GooString *tmpr = convtoX(r);
113 GooString *tmpg = convtoX(g);
114 GooString *tmpb = convtoX(b);
115 tmp->append(tmpr);
116 tmp->append(tmpg);
117 tmp->append(tmpb);
118 delete tmpr;
119 delete tmpg;
120 delete tmpb;
121 return tmp;
122 }
123
HtmlFont(GfxFont * font,int _size,GfxRGB rgb,double opacity)124 HtmlFont::HtmlFont(GfxFont *font, int _size, GfxRGB rgb, double opacity)
125 {
126 color = HtmlFontColor(rgb, opacity);
127
128 lineSize = -1;
129
130 size = _size;
131 italic = false;
132 bold = false;
133 rotOrSkewed = false;
134
135 if (font->isBold() || font->getWeight() >= GfxFont::W700)
136 bold = true;
137 if (font->isItalic())
138 italic = true;
139
140 if (const GooString *fontname = font->getName()) {
141 FontName = new GooString(fontname);
142
143 GooString fontnameLower(fontname);
144 fontnameLower.lowerCase();
145
146 if (!bold && strstr(fontnameLower.c_str(), "bold")) {
147 bold = true;
148 }
149
150 if (!italic && (strstr(fontnameLower.c_str(), "italic") || strstr(fontnameLower.c_str(), "oblique"))) {
151 italic = true;
152 }
153
154 familyName = fontname->c_str();
155 removeStyleSuffix(familyName);
156 } else {
157 FontName = new GooString(defaultFamilyName);
158 familyName = defaultFamilyName;
159 }
160
161 rotSkewMat[0] = rotSkewMat[1] = rotSkewMat[2] = rotSkewMat[3] = 0;
162 }
163
HtmlFont(const HtmlFont & x)164 HtmlFont::HtmlFont(const HtmlFont &x)
165 {
166 size = x.size;
167 lineSize = x.lineSize;
168 italic = x.italic;
169 bold = x.bold;
170 familyName = x.familyName;
171 color = x.color;
172 FontName = new GooString(x.FontName);
173 rotOrSkewed = x.rotOrSkewed;
174 memcpy(rotSkewMat, x.rotSkewMat, sizeof(rotSkewMat));
175 }
176
~HtmlFont()177 HtmlFont::~HtmlFont()
178 {
179 delete FontName;
180 }
181
operator =(const HtmlFont & x)182 HtmlFont &HtmlFont::operator=(const HtmlFont &x)
183 {
184 if (this == &x)
185 return *this;
186 size = x.size;
187 lineSize = x.lineSize;
188 italic = x.italic;
189 bold = x.bold;
190 familyName = x.familyName;
191 color = x.color;
192 delete FontName;
193 FontName = new GooString(x.FontName);
194 return *this;
195 }
196
197 /*
198 This function is used to compare font uniquely for insertion into
199 the list of all encountered fonts
200 */
isEqual(const HtmlFont & x) const201 bool HtmlFont::isEqual(const HtmlFont &x) const
202 {
203 return (size == x.size) && (lineSize == x.lineSize) && (FontName->cmp(x.FontName) == 0) && (bold == x.bold) && (italic == x.italic) && (color.isEqual(x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed()
204 && (!isRotOrSkewed() || rot_matrices_equal(getRotMat(), x.getRotMat()));
205 }
206
207 /*
208 This one is used to decide whether two pieces of text can be joined together
209 and therefore we don't care about bold/italics properties
210 */
isEqualIgnoreBold(const HtmlFont & x) const211 bool HtmlFont::isEqualIgnoreBold(const HtmlFont &x) const
212 {
213 return ((size == x.size) && (familyName == x.familyName) && (color.isEqual(x.getColor())));
214 }
215
getFontName()216 GooString *HtmlFont::getFontName()
217 {
218 return new GooString(familyName);
219 }
220
getFullName()221 GooString *HtmlFont::getFullName()
222 {
223 return new GooString(FontName);
224 }
225
226 // this method if plain wrong todo
HtmlFilter(const Unicode * u,int uLen)227 GooString *HtmlFont::HtmlFilter(const Unicode *u, int uLen)
228 {
229 GooString *tmp = new GooString();
230 const UnicodeMap *uMap;
231 char buf[8];
232 int n;
233
234 // get the output encoding
235 if (!(uMap = globalParams->getTextEncoding())) {
236 return tmp;
237 }
238
239 for (int i = 0; i < uLen; ++i) {
240 // skip control characters. W3C disallows them and they cause a warning
241 // with PHP.
242 if (u[i] <= 31 && u[i] != '\t')
243 continue;
244
245 switch (u[i]) {
246 case '"':
247 tmp->append(""");
248 break;
249 case '&':
250 tmp->append("&");
251 break;
252 case '<':
253 tmp->append("<");
254 break;
255 case '>':
256 tmp->append(">");
257 break;
258 case ' ':
259 case '\t':
260 tmp->append(!xml && (i + 1 >= uLen || !tmp->getLength() || tmp->getChar(tmp->getLength() - 1) == ' ') ? " " : " ");
261 break;
262 default: {
263 // convert unicode to string
264 if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) {
265 tmp->append(buf, n);
266 }
267 }
268 }
269 }
270
271 return tmp;
272 }
273
HtmlFontAccu()274 HtmlFontAccu::HtmlFontAccu() { }
275
~HtmlFontAccu()276 HtmlFontAccu::~HtmlFontAccu() { }
277
AddFont(const HtmlFont & font)278 int HtmlFontAccu::AddFont(const HtmlFont &font)
279 {
280 std::vector<HtmlFont>::iterator i;
281 for (i = accu.begin(); i != accu.end(); ++i) {
282 if (font.isEqual(*i)) {
283 return (int)(i - (accu.begin()));
284 }
285 }
286
287 accu.push_back(font);
288 return (accu.size() - 1);
289 }
290
291 // get CSS font definition for font #i
CSStyle(int i,int j)292 GooString *HtmlFontAccu::CSStyle(int i, int j)
293 {
294 GooString *tmp = new GooString();
295
296 std::vector<HtmlFont>::iterator g = accu.begin();
297 g += i;
298 HtmlFont font = *g;
299 GooString *colorStr = font.getColor().toString();
300 GooString *fontName = (fontFullName ? font.getFullName() : font.getFontName());
301
302 if (!xml) {
303 tmp->append(".ft");
304 tmp->append(std::to_string(j));
305 tmp->append(std::to_string(i));
306 tmp->append("{font-size:");
307 tmp->append(std::to_string(font.getSize()));
308 if (font.getLineSize() != -1 && font.getLineSize() != 0) {
309 tmp->append("px;line-height:");
310 tmp->append(std::to_string(font.getLineSize()));
311 }
312 tmp->append("px;font-family:");
313 tmp->append(fontName); // font.getFontName());
314 tmp->append(";color:");
315 tmp->append(colorStr);
316 if (font.getColor().getOpacity() != 1.0) {
317 tmp->append(";opacity:");
318 tmp->append(std::to_string(font.getColor().getOpacity()));
319 }
320 // if there is rotation or skew, include the matrix
321 if (font.isRotOrSkewed()) {
322 const double *const text_mat = font.getRotMat();
323 GooString matrix_str(" matrix(");
324 matrix_str.appendf("{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)", text_mat[0], text_mat[1], text_mat[2], text_mat[3]);
325 tmp->append(";-moz-transform:");
326 tmp->append(&matrix_str);
327 tmp->append(";-webkit-transform:");
328 tmp->append(&matrix_str);
329 tmp->append(";-o-transform:");
330 tmp->append(&matrix_str);
331 tmp->append(";-ms-transform:");
332 tmp->append(&matrix_str);
333 // Todo: 75% is a wild guess that seems to work pretty well;
334 // We probably need to calculate the real percentage
335 // Based on the characteristic baseline and bounding box of current font
336 // PDF origin is at baseline
337 tmp->append(";-moz-transform-origin: left 75%");
338 tmp->append(";-webkit-transform-origin: left 75%");
339 tmp->append(";-o-transform-origin: left 75%");
340 tmp->append(";-ms-transform-origin: left 75%");
341 }
342 tmp->append(";}");
343 }
344 if (xml) {
345 tmp->append("<fontspec id=\"");
346 tmp->append(std::to_string(i));
347 tmp->append("\" size=\"");
348 tmp->append(std::to_string(font.getSize()));
349 tmp->append("\" family=\"");
350 tmp->append(fontName);
351 tmp->append("\" color=\"");
352 tmp->append(colorStr);
353 if (font.getColor().getOpacity() != 1.0) {
354 tmp->append("\" opacity=\"");
355 tmp->append(std::to_string(font.getColor().getOpacity()));
356 }
357 tmp->append("\"/>");
358 }
359
360 delete fontName;
361 delete colorStr;
362 return tmp;
363 }
364