1 //========================================================================
2 //
3 // This file comes from pdftohtml project
4 // http://pdftohtml.sourceforge.net
5 //
6 // Copyright from:
7 // Gueorgui Ovtcharov
8 // Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
9 // Mikhail Kruk <meshko@cs.brandeis.edu>
10 //
11 //========================================================================
12 
13 //========================================================================
14 //
15 // Modified under the Poppler project - http://poppler.freedesktop.org
16 //
17 // All changes made under the Poppler project to this file are licensed
18 // under GPL version 2 or later
19 //
20 // Copyright (C) 2007, 2010, 2012, 2018, 2020 Albert Astals Cid <aacid@kde.org>
21 // Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
22 // Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
23 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
24 // Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
25 // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
26 // Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com>
27 // Copyright (C) 2012 Luis Parravicini <lparravi@gmail.com>
28 // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
29 // Copyright (C) 2017 Jason Crain <jason@inspiresomeone.us>
30 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
31 // Copyright (C) 2018 Steven Boswell <ulatekh@yahoo.com>
32 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
33 // Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
34 // Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com>
35 //
36 // To see a description of the changes please see the Changelog file that
37 // came with your tarball or type make ChangeLog if you are building from git
38 //
39 //========================================================================
40 
41 #include "HtmlFonts.h"
42 #include "HtmlUtils.h"
43 #include "GlobalParams.h"
44 #include "UnicodeMap.h"
45 #include "GfxFont.h"
46 #include <cstdio>
47 
48 namespace {
49 
50 const char *const defaultFamilyName = "Times";
51 
52 const char *const styleSuffixes[] = {
53     "-Regular", "-Bold", "-BoldOblique", "-BoldItalic", "-Oblique", "-Italic", "-Roman",
54 };
55 
removeStyleSuffix(std::string & familyName)56 void removeStyleSuffix(std::string &familyName)
57 {
58     for (const char *const styleSuffix : styleSuffixes) {
59         auto pos = familyName.rfind(styleSuffix);
60         if (pos != std::string::npos) {
61             familyName.resize(pos);
62             return;
63         }
64     }
65 }
66 
67 }
68 
69 #define xoutRound(x) ((int)(x + 0.5))
70 extern bool xml;
71 extern bool fontFullName;
72 
HtmlFontColor(GfxRGB rgb,double opacity_)73 HtmlFontColor::HtmlFontColor(GfxRGB rgb, double opacity_)
74 {
75     r = static_cast<int>(rgb.r / 65535.0 * 255.0);
76     g = static_cast<int>(rgb.g / 65535.0 * 255.0);
77     b = static_cast<int>(rgb.b / 65535.0 * 255.0);
78     opacity = static_cast<int>(opacity_ * 255.999);
79     if (!(Ok(r) && Ok(b) && Ok(g) && Ok(opacity))) {
80         if (!globalParams->getErrQuiet())
81             fprintf(stderr, "Error : Bad color (%d,%d,%d,%d) reset to (0,0,0,255)\n", r, g, b, opacity);
82         r = 0;
83         g = 0;
84         b = 0;
85         opacity = 255;
86     }
87 }
88 
convtoX(unsigned int xcol) const89 GooString *HtmlFontColor::convtoX(unsigned int xcol) const
90 {
91     GooString *xret = new GooString();
92     char tmp;
93     unsigned int k;
94     k = (xcol / 16);
95     if (k < 10)
96         tmp = (char)('0' + k);
97     else
98         tmp = (char)('a' + k - 10);
99     xret->append(tmp);
100     k = (xcol % 16);
101     if (k < 10)
102         tmp = (char)('0' + k);
103     else
104         tmp = (char)('a' + k - 10);
105     xret->append(tmp);
106     return xret;
107 }
108 
toString() const109 GooString *HtmlFontColor::toString() const
110 {
111     GooString *tmp = new GooString("#");
112     GooString *tmpr = convtoX(r);
113     GooString *tmpg = convtoX(g);
114     GooString *tmpb = convtoX(b);
115     tmp->append(tmpr);
116     tmp->append(tmpg);
117     tmp->append(tmpb);
118     delete tmpr;
119     delete tmpg;
120     delete tmpb;
121     return tmp;
122 }
123 
HtmlFont(GfxFont * font,int _size,GfxRGB rgb,double opacity)124 HtmlFont::HtmlFont(GfxFont *font, int _size, GfxRGB rgb, double opacity)
125 {
126     color = HtmlFontColor(rgb, opacity);
127 
128     lineSize = -1;
129 
130     size = _size;
131     italic = false;
132     bold = false;
133     rotOrSkewed = false;
134 
135     if (font->isBold() || font->getWeight() >= GfxFont::W700)
136         bold = true;
137     if (font->isItalic())
138         italic = true;
139 
140     if (const GooString *fontname = font->getName()) {
141         FontName = new GooString(fontname);
142 
143         GooString fontnameLower(fontname);
144         fontnameLower.lowerCase();
145 
146         if (!bold && strstr(fontnameLower.c_str(), "bold")) {
147             bold = true;
148         }
149 
150         if (!italic && (strstr(fontnameLower.c_str(), "italic") || strstr(fontnameLower.c_str(), "oblique"))) {
151             italic = true;
152         }
153 
154         familyName = fontname->c_str();
155         removeStyleSuffix(familyName);
156     } else {
157         FontName = new GooString(defaultFamilyName);
158         familyName = defaultFamilyName;
159     }
160 
161     rotSkewMat[0] = rotSkewMat[1] = rotSkewMat[2] = rotSkewMat[3] = 0;
162 }
163 
HtmlFont(const HtmlFont & x)164 HtmlFont::HtmlFont(const HtmlFont &x)
165 {
166     size = x.size;
167     lineSize = x.lineSize;
168     italic = x.italic;
169     bold = x.bold;
170     familyName = x.familyName;
171     color = x.color;
172     FontName = new GooString(x.FontName);
173     rotOrSkewed = x.rotOrSkewed;
174     memcpy(rotSkewMat, x.rotSkewMat, sizeof(rotSkewMat));
175 }
176 
~HtmlFont()177 HtmlFont::~HtmlFont()
178 {
179     delete FontName;
180 }
181 
operator =(const HtmlFont & x)182 HtmlFont &HtmlFont::operator=(const HtmlFont &x)
183 {
184     if (this == &x)
185         return *this;
186     size = x.size;
187     lineSize = x.lineSize;
188     italic = x.italic;
189     bold = x.bold;
190     familyName = x.familyName;
191     color = x.color;
192     delete FontName;
193     FontName = new GooString(x.FontName);
194     return *this;
195 }
196 
197 /*
198   This function is used to compare font uniquely for insertion into
199   the list of all encountered fonts
200 */
isEqual(const HtmlFont & x) const201 bool HtmlFont::isEqual(const HtmlFont &x) const
202 {
203     return (size == x.size) && (lineSize == x.lineSize) && (FontName->cmp(x.FontName) == 0) && (bold == x.bold) && (italic == x.italic) && (color.isEqual(x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed()
204             && (!isRotOrSkewed() || rot_matrices_equal(getRotMat(), x.getRotMat()));
205 }
206 
207 /*
208   This one is used to decide whether two pieces of text can be joined together
209   and therefore we don't care about bold/italics properties
210 */
isEqualIgnoreBold(const HtmlFont & x) const211 bool HtmlFont::isEqualIgnoreBold(const HtmlFont &x) const
212 {
213     return ((size == x.size) && (familyName == x.familyName) && (color.isEqual(x.getColor())));
214 }
215 
getFontName()216 GooString *HtmlFont::getFontName()
217 {
218     return new GooString(familyName);
219 }
220 
getFullName()221 GooString *HtmlFont::getFullName()
222 {
223     return new GooString(FontName);
224 }
225 
226 // this method if plain wrong todo
HtmlFilter(const Unicode * u,int uLen)227 GooString *HtmlFont::HtmlFilter(const Unicode *u, int uLen)
228 {
229     GooString *tmp = new GooString();
230     const UnicodeMap *uMap;
231     char buf[8];
232     int n;
233 
234     // get the output encoding
235     if (!(uMap = globalParams->getTextEncoding())) {
236         return tmp;
237     }
238 
239     for (int i = 0; i < uLen; ++i) {
240         // skip control characters.  W3C disallows them and they cause a warning
241         // with PHP.
242         if (u[i] <= 31 && u[i] != '\t')
243             continue;
244 
245         switch (u[i]) {
246         case '"':
247             tmp->append("&#34;");
248             break;
249         case '&':
250             tmp->append("&amp;");
251             break;
252         case '<':
253             tmp->append("&lt;");
254             break;
255         case '>':
256             tmp->append("&gt;");
257             break;
258         case ' ':
259         case '\t':
260             tmp->append(!xml && (i + 1 >= uLen || !tmp->getLength() || tmp->getChar(tmp->getLength() - 1) == ' ') ? "&#160;" : " ");
261             break;
262         default: {
263             // convert unicode to string
264             if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) {
265                 tmp->append(buf, n);
266             }
267         }
268         }
269     }
270 
271     return tmp;
272 }
273 
HtmlFontAccu()274 HtmlFontAccu::HtmlFontAccu() { }
275 
~HtmlFontAccu()276 HtmlFontAccu::~HtmlFontAccu() { }
277 
AddFont(const HtmlFont & font)278 int HtmlFontAccu::AddFont(const HtmlFont &font)
279 {
280     std::vector<HtmlFont>::iterator i;
281     for (i = accu.begin(); i != accu.end(); ++i) {
282         if (font.isEqual(*i)) {
283             return (int)(i - (accu.begin()));
284         }
285     }
286 
287     accu.push_back(font);
288     return (accu.size() - 1);
289 }
290 
291 // get CSS font definition for font #i
CSStyle(int i,int j)292 GooString *HtmlFontAccu::CSStyle(int i, int j)
293 {
294     GooString *tmp = new GooString();
295 
296     std::vector<HtmlFont>::iterator g = accu.begin();
297     g += i;
298     HtmlFont font = *g;
299     GooString *colorStr = font.getColor().toString();
300     GooString *fontName = (fontFullName ? font.getFullName() : font.getFontName());
301 
302     if (!xml) {
303         tmp->append(".ft");
304         tmp->append(std::to_string(j));
305         tmp->append(std::to_string(i));
306         tmp->append("{font-size:");
307         tmp->append(std::to_string(font.getSize()));
308         if (font.getLineSize() != -1 && font.getLineSize() != 0) {
309             tmp->append("px;line-height:");
310             tmp->append(std::to_string(font.getLineSize()));
311         }
312         tmp->append("px;font-family:");
313         tmp->append(fontName); // font.getFontName());
314         tmp->append(";color:");
315         tmp->append(colorStr);
316         if (font.getColor().getOpacity() != 1.0) {
317             tmp->append(";opacity:");
318             tmp->append(std::to_string(font.getColor().getOpacity()));
319         }
320         // if there is rotation or skew, include the matrix
321         if (font.isRotOrSkewed()) {
322             const double *const text_mat = font.getRotMat();
323             GooString matrix_str(" matrix(");
324             matrix_str.appendf("{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)", text_mat[0], text_mat[1], text_mat[2], text_mat[3]);
325             tmp->append(";-moz-transform:");
326             tmp->append(&matrix_str);
327             tmp->append(";-webkit-transform:");
328             tmp->append(&matrix_str);
329             tmp->append(";-o-transform:");
330             tmp->append(&matrix_str);
331             tmp->append(";-ms-transform:");
332             tmp->append(&matrix_str);
333             // Todo: 75% is a wild guess that seems to work pretty well;
334             // We probably need to calculate the real percentage
335             // Based on the characteristic baseline and bounding box of current font
336             // PDF origin is at baseline
337             tmp->append(";-moz-transform-origin: left 75%");
338             tmp->append(";-webkit-transform-origin: left 75%");
339             tmp->append(";-o-transform-origin: left 75%");
340             tmp->append(";-ms-transform-origin: left 75%");
341         }
342         tmp->append(";}");
343     }
344     if (xml) {
345         tmp->append("<fontspec id=\"");
346         tmp->append(std::to_string(i));
347         tmp->append("\" size=\"");
348         tmp->append(std::to_string(font.getSize()));
349         tmp->append("\" family=\"");
350         tmp->append(fontName);
351         tmp->append("\" color=\"");
352         tmp->append(colorStr);
353         if (font.getColor().getOpacity() != 1.0) {
354             tmp->append("\" opacity=\"");
355             tmp->append(std::to_string(font.getColor().getOpacity()));
356         }
357         tmp->append("\"/>");
358     }
359 
360     delete fontName;
361     delete colorStr;
362     return tmp;
363 }
364