1 //========================================================================
2 //
3 // HtmlOutputDev.cc
4 //
5 // Copyright 1997-2002 Glyph & Cog, LLC
6 //
7 // Changed 1999-2000 by G.Ovtcharov
8 //
9 // Changed 2002 by Mikhail Kruk
10 //
11 //========================================================================
12
13 //========================================================================
14 //
15 // Modified under the Poppler project - http://poppler.freedesktop.org
16 //
17 // All changes made under the Poppler project to this file are licensed
18 // under GPL version 2 or later
19 //
20 // Copyright (C) 2005-2013, 2016-2021 Albert Astals Cid <aacid@kde.org>
21 // Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
22 // Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
23 // Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
24 // Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
25 // Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
26 // Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
27 // Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
28 // Copyright (C) 2010, 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
29 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
30 // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
31 // Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
32 // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
33 // Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com>
34 // Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com>
35 // Copyright (C) 2012 Gerald Schmidt <solahcin@gmail.com>
36 // Copyright (C) 2012 Pino Toscano <pino@kde.org>
37 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
38 // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
39 // Copyright (C) 2013 Johannes Brandstätter <jbrandstaetter@gmail.com>
40 // Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it>
41 // Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent@gmail.com>
42 // Copyright (C) 2017 Caolán McNamara <caolanm@redhat.com>
43 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
44 // Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com>
45 // Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de>
46 // Copyright (C) 2019, 2020 Oliver Sander <oliver.sander@tu-dresden.de>
47 // Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com>
48 // Copyright (C) 2021 Christopher Hasse <hasse.christopher@gmail.com>
49 //
50 // To see a description of the changes please see the Changelog file that
51 // came with your tarball or type make ChangeLog if you are building from git
52 //
53 //========================================================================
54
55 #include "config.h"
56 #include <cstdio>
57 #include <cstdlib>
58 #include <cstdarg>
59 #include <cstddef>
60 #include <cctype>
61 #include <cmath>
62 #include <iostream>
63 #include "goo/GooString.h"
64 #include "goo/gbasename.h"
65 #include "goo/gbase64.h"
66 #include "goo/gbasename.h"
67 #include "UnicodeMap.h"
68 #include "goo/gmem.h"
69 #include "Error.h"
70 #include "GfxState.h"
71 #include "Page.h"
72 #include "Annot.h"
73 #include "PNGWriter.h"
74 #include "GlobalParams.h"
75 #include "HtmlOutputDev.h"
76 #include "HtmlFonts.h"
77 #include "HtmlUtils.h"
78 #include "InMemoryFile.h"
79 #include "Outline.h"
80 #include "PDFDoc.h"
81
82 #ifdef ENABLE_LIBPNG
83 # include <png.h>
84 #endif
85
86 #define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
87
88 class HtmlImage
89 {
90 public:
HtmlImage(GooString * _fName,GfxState * state)91 HtmlImage(GooString *_fName, GfxState *state) : fName(_fName)
92 {
93 state->transform(0, 0, &xMin, &yMax);
94 state->transform(1, 1, &xMax, &yMin);
95 }
~HtmlImage()96 ~HtmlImage() { delete fName; }
97 HtmlImage(const HtmlImage &) = delete;
98 HtmlImage &operator=(const HtmlImage &) = delete;
99
100 double xMin, xMax; // image x coordinates
101 double yMin, yMax; // image y coordinates
102 GooString *fName; // image file name
103 };
104
105 // returns true if x is closer to y than x is to z
IS_CLOSER(float x,float y,float z)106 static inline bool IS_CLOSER(float x, float y, float z)
107 {
108 return std::fabs((x) - (y)) < std::fabs((x) - (z));
109 }
110
111 extern bool complexMode;
112 extern bool singleHtml;
113 extern bool dataUrls;
114 extern bool ignore;
115 extern bool printCommands;
116 extern bool printHtml;
117 extern bool noframes;
118 extern bool stout;
119 extern bool xml;
120 extern bool noRoundedCoordinates;
121 extern bool showHidden;
122 extern bool noMerge;
123
124 extern double wordBreakThreshold;
125
126 static bool debug = false;
127 static GooString *gstr_buff0 = nullptr; // a workspace in which I format strings
128
129 #if 0
130 static GooString* Dirname(GooString* str){
131
132 char *p=str->c_str();
133 int len=str->getLength();
134 for (int i=len-1;i>=0;i--)
135 if (*(p+i)==SLASH)
136 return new GooString(p,i+1);
137 return new GooString();
138 }
139 #endif
140
print_matrix(const double * mat)141 static const char *print_matrix(const double *mat)
142 {
143 delete gstr_buff0;
144
145 gstr_buff0 = GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]", *mat, mat[1], mat[2], mat[3], mat[4], mat[5]);
146 return gstr_buff0->c_str();
147 }
148
print_uni_str(const Unicode * u,const unsigned uLen)149 static const char *print_uni_str(const Unicode *u, const unsigned uLen)
150 {
151 GooString *gstr_buff1 = nullptr;
152
153 delete gstr_buff0;
154
155 if (!uLen)
156 return "";
157 gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?'));
158 for (unsigned i = 1; i < uLen; i++) {
159 if (u[i] < 0x7F) {
160 gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?');
161 delete gstr_buff0;
162 gstr_buff0 = gstr_buff1;
163 }
164 }
165
166 return gstr_buff0->c_str();
167 }
168
169 //------------------------------------------------------------------------
170 // HtmlString
171 //------------------------------------------------------------------------
172
HtmlString(GfxState * state,double fontSize,HtmlFontAccu * _fonts)173 HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) : fonts(_fonts)
174 {
175 GfxFont *font;
176 double x, y;
177
178 state->transform(state->getCurX(), state->getCurY(), &x, &y);
179 if ((font = state->getFont())) {
180 double ascent = font->getAscent();
181 double descent = font->getDescent();
182 if (ascent > 1.05) {
183 // printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
184 ascent = 1.05;
185 }
186 if (descent < -0.4) {
187 // printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
188 descent = -0.4;
189 }
190 yMin = y - ascent * fontSize;
191 yMax = y - descent * fontSize;
192 GfxRGB rgb;
193 state->getFillRGB(&rgb);
194 HtmlFont hfont = HtmlFont(font, static_cast<int>(fontSize), rgb, state->getFillOpacity());
195 if (isMatRotOrSkew(state->getTextMat())) {
196 double normalizedMatrix[4];
197 memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
198 // browser rotates the opposite way
199 // so flip the sign of the angle -> sin() components change sign
200 if (debug)
201 std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl;
202 normalizedMatrix[1] *= -1;
203 normalizedMatrix[2] *= -1;
204 if (debug)
205 std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl;
206 normalizeRotMat(normalizedMatrix);
207 if (debug)
208 std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl;
209 hfont.setRotMat(normalizedMatrix);
210 }
211 fontpos = fonts->AddFont(hfont);
212 } else {
213 // this means that the PDF file draws text without a current font,
214 // which should never happen
215 yMin = y - 0.95 * fontSize;
216 yMax = y + 0.35 * fontSize;
217 fontpos = 0;
218 }
219 if (yMin == yMax) {
220 // this is a sanity check for a case that shouldn't happen -- but
221 // if it does happen, we want to avoid dividing by zero later
222 yMin = y;
223 yMax = y + 1;
224 }
225 col = 0;
226 text = nullptr;
227 xRight = nullptr;
228 link = nullptr;
229 len = size = 0;
230 yxNext = nullptr;
231 xyNext = nullptr;
232 htext = new GooString();
233 dir = textDirUnknown;
234 }
235
~HtmlString()236 HtmlString::~HtmlString()
237 {
238 gfree(text);
239 delete htext;
240 gfree(xRight);
241 }
242
addChar(GfxState * state,double x,double y,double dx,double dy,Unicode u)243 void HtmlString::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u)
244 {
245 if (dir == textDirUnknown) {
246 // dir = UnicodeMap::getDirection(u);
247 dir = textDirLeftRight;
248 }
249
250 if (len == size) {
251 size += 16;
252 text = (Unicode *)grealloc(text, size * sizeof(Unicode));
253 xRight = (double *)grealloc(xRight, size * sizeof(double));
254 }
255 text[len] = u;
256 if (len == 0) {
257 xMin = x;
258 }
259 xMax = xRight[len] = x + dx;
260 // printf("added char: %f %f xright = %f\n", x, dx, x+dx);
261 ++len;
262 }
263
endString()264 void HtmlString::endString()
265 {
266 if (dir == textDirRightLeft && len > 1) {
267 // printf("will reverse!\n");
268 for (int i = 0; i < len / 2; i++) {
269 Unicode ch = text[i];
270 text[i] = text[len - i - 1];
271 text[len - i - 1] = ch;
272 }
273 }
274 }
275
276 //------------------------------------------------------------------------
277 // HtmlPage
278 //------------------------------------------------------------------------
279
HtmlPage(bool rawOrderA)280 HtmlPage::HtmlPage(bool rawOrderA)
281 {
282 rawOrder = rawOrderA;
283 curStr = nullptr;
284 yxStrings = nullptr;
285 xyStrings = nullptr;
286 yxCur1 = yxCur2 = nullptr;
287 fonts = new HtmlFontAccu();
288 links = new HtmlLinks();
289 pageWidth = 0;
290 pageHeight = 0;
291 fontsPageMarker = 0;
292 DocName = nullptr;
293 firstPage = -1;
294 }
295
~HtmlPage()296 HtmlPage::~HtmlPage()
297 {
298 clear();
299 delete DocName;
300 delete fonts;
301 delete links;
302 for (auto entry : imgList) {
303 delete entry;
304 }
305 }
306
updateFont(GfxState * state)307 void HtmlPage::updateFont(GfxState *state)
308 {
309 GfxFont *font;
310 const char *name;
311 int code;
312 double w;
313
314 // adjust the font size
315 fontSize = state->getTransformedFontSize();
316 if ((font = state->getFont()) && font->getType() == fontType3) {
317 // This is a hack which makes it possible to deal with some Type 3
318 // fonts. The problem is that it's impossible to know what the
319 // base coordinate system used in the font is without actually
320 // rendering the font. This code tries to guess by looking at the
321 // width of the character 'm' (which breaks if the font is a
322 // subset that doesn't contain 'm').
323 for (code = 0; code < 256; ++code) {
324 if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') {
325 break;
326 }
327 }
328 if (code < 256) {
329 w = ((Gfx8BitFont *)font)->getWidth(code);
330 if (w != 0) {
331 // 600 is a generic average 'm' width -- yes, this is a hack
332 fontSize *= w / 0.6;
333 }
334 }
335 const double *fm = font->getFontMatrix();
336 if (fm[0] != 0) {
337 fontSize *= fabs(fm[3] / fm[0]);
338 }
339 }
340 }
341
beginString(GfxState * state,const GooString * s)342 void HtmlPage::beginString(GfxState *state, const GooString *s)
343 {
344 curStr = new HtmlString(state, fontSize, fonts);
345 }
346
conv()347 void HtmlPage::conv()
348 {
349 for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
350 delete tmp->htext;
351 tmp->htext = HtmlFont::HtmlFilter(tmp->text, tmp->len);
352
353 int linkIndex = 0;
354 if (links->inLink(tmp->xMin, tmp->yMin, tmp->xMax, tmp->yMax, linkIndex)) {
355 tmp->link = links->getLink(linkIndex);
356 }
357 }
358 }
359
addChar(GfxState * state,double x,double y,double dx,double dy,double ox,double oy,const Unicode * u,int uLen)360 void HtmlPage::addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen)
361 {
362 double x1, y1, w1, h1, dx2, dy2;
363 int n, i;
364 state->transform(x, y, &x1, &y1);
365 n = curStr->len;
366
367 // check that new character is in the same direction as current string
368 // and is not too far away from it before adding
369 // if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
370 // XXX
371 if (debug) {
372 const double *text_mat = state->getTextMat();
373 // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
374 // sin q is zero iff there is no rotation, or 180 deg. rotation;
375 // for 180 rotation, cos q will be negative
376 if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) {
377 std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl;
378 std::cerr << "text " << print_matrix(state->getTextMat());
379 }
380 }
381 if (n > 0 && // don't start a new string, unless there is already a string
382 // TODO: the following line assumes that text is flowing left to
383 // right, which will not necessarily be the case, e.g. if rotated;
384 // It assesses whether or not two characters are close enough to
385 // be part of the same string
386 fabs(x1 - curStr->xRight[n - 1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) &&
387 // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
388 // sin q is zero iff there is no rotation, or 180 deg. rotation;
389 // for 180 rotation, cos q will be negative
390 !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat())) {
391 endString();
392 beginString(state, nullptr);
393 }
394 state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), 0, &dx2, &dy2);
395 dx -= dx2;
396 dy -= dy2;
397 state->transformDelta(dx, dy, &w1, &h1);
398 if (uLen != 0) {
399 w1 /= uLen;
400 h1 /= uLen;
401 }
402 for (i = 0; i < uLen; ++i) {
403 curStr->addChar(state, x1 + i * w1, y1 + i * h1, w1, h1, u[i]);
404 }
405 }
406
endString()407 void HtmlPage::endString()
408 {
409 HtmlString *p1, *p2;
410 double h, y1, y2;
411
412 // throw away zero-length strings -- they don't have valid xMin/xMax
413 // values, and they're useless anyway
414 if (curStr->len == 0) {
415 delete curStr;
416 curStr = nullptr;
417 return;
418 }
419
420 curStr->endString();
421
422 #if 0 //~tmp
423 if (curStr->yMax - curStr->yMin > 20) {
424 delete curStr;
425 curStr = NULL;
426 return;
427 }
428 #endif
429
430 // insert string in y-major list
431 h = curStr->yMax - curStr->yMin;
432 y1 = curStr->yMin + 0.5 * h;
433 y2 = curStr->yMin + 0.8 * h;
434 if (rawOrder) {
435 p1 = yxCur1;
436 p2 = nullptr;
437 } else if ((!yxCur1 || (y1 >= yxCur1->yMin && (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && (!yxCur2 || (y1 < yxCur2->yMin || (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
438 p1 = yxCur1;
439 p2 = yxCur2;
440 } else {
441 for (p1 = nullptr, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
442 if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
443 break;
444 }
445 yxCur2 = p2;
446 }
447 yxCur1 = curStr;
448 if (p1)
449 p1->yxNext = curStr;
450 else
451 yxStrings = curStr;
452 curStr->yxNext = p2;
453 curStr = nullptr;
454 }
455
strrstr(const char * s,const char * ss)456 static const char *strrstr(const char *s, const char *ss)
457 {
458 const char *p = strstr(s, ss);
459 for (const char *pp = p; pp != nullptr; pp = strstr(p + 1, ss)) {
460 p = pp;
461 }
462 return p;
463 }
464
CloseTags(GooString * htext,bool & finish_a,bool & finish_italic,bool & finish_bold)465 static void CloseTags(GooString *htext, bool &finish_a, bool &finish_italic, bool &finish_bold)
466 {
467 const char *last_italic = finish_italic && (finish_bold || finish_a) ? strrstr(htext->c_str(), "<i>") : nullptr;
468 const char *last_bold = finish_bold && (finish_italic || finish_a) ? strrstr(htext->c_str(), "<b>") : nullptr;
469 const char *last_a = finish_a && (finish_italic || finish_bold) ? strrstr(htext->c_str(), "<a ") : nullptr;
470 if (finish_a && (finish_italic || finish_bold) && last_a > (last_italic > last_bold ? last_italic : last_bold)) {
471 htext->append("</a>", 4);
472 finish_a = false;
473 }
474 if (finish_italic && finish_bold && last_italic > last_bold) {
475 htext->append("</i>", 4);
476 finish_italic = false;
477 }
478 if (finish_bold)
479 htext->append("</b>", 4);
480 if (finish_italic)
481 htext->append("</i>", 4);
482 if (finish_a)
483 htext->append("</a>");
484 }
485
486 // Strings are lines of text;
487 // This function aims to combine strings into lines and paragraphs if !noMerge
488 // It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
coalesce()489 void HtmlPage::coalesce()
490 {
491 HtmlString *str1, *str2;
492 double space, horSpace, vertSpace, vertOverlap;
493 bool addSpace, addLineBreak;
494 int n, i;
495 double curX, curY;
496
497 #if 0 //~ for debugging
498 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
499 printf("x=%f..%f y=%f..%f size=%2d '",
500 str1->xMin, str1->xMax, str1->yMin, str1->yMax,
501 (int)(str1->yMax - str1->yMin));
502 for (i = 0; i < str1->len; ++i) {
503 fputc(str1->text[i] & 0xff, stdout);
504 }
505 printf("'\n");
506 }
507 printf("\n------------------------------------------------------------\n\n");
508 #endif
509 str1 = yxStrings;
510
511 if (!str1)
512 return;
513
514 //----- discard duplicated text (fake boldface, drop shadows)
515 if (!complexMode) { /* if not in complex mode get rid of duplicate strings */
516 HtmlString *str3;
517 bool found;
518 while (str1) {
519 double size = str1->yMax - str1->yMin;
520 double xLimit = str1->xMin + size;
521 found = false;
522 for (str2 = str1, str3 = str1->yxNext; str3 && str3->xMin < xLimit; str2 = str3, str3 = str2->yxNext) {
523 if (str3->len == str1->len && !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) && fabs(str3->yMin - str1->yMin) < size * 0.2 && fabs(str3->yMax - str1->yMax) < size * 0.2
524 && fabs(str3->xMax - str1->xMax) < size * 0.1) {
525 found = true;
526 // printf("found duplicate!\n");
527 break;
528 }
529 }
530 if (found) {
531 str2->xyNext = str3->xyNext;
532 str2->yxNext = str3->yxNext;
533 delete str3;
534 } else {
535 str1 = str1->yxNext;
536 }
537 }
538 } /*- !complexMode */
539
540 str1 = yxStrings;
541
542 const HtmlFont *hfont1 = getFont(str1);
543 if (hfont1->isBold())
544 str1->htext->insert(0, "<b>", 3);
545 if (hfont1->isItalic())
546 str1->htext->insert(0, "<i>", 3);
547 if (str1->getLink() != nullptr) {
548 GooString *ls = str1->getLink()->getLinkStart();
549 str1->htext->insert(0, ls);
550 delete ls;
551 }
552 curX = str1->xMin;
553 curY = str1->yMin;
554
555 while (str1 && (str2 = str1->yxNext)) {
556 const HtmlFont *hfont2 = getFont(str2);
557 space = str1->yMax - str1->yMin; // the height of the font's bounding box
558 horSpace = str2->xMin - str1->xMax;
559 // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
560 addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax);
561 vertSpace = str2->yMin - str1->yMax;
562
563 // printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
564
565 if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) {
566 vertOverlap = str1->yMax - str2->yMin;
567 } else if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax) {
568 vertOverlap = str2->yMax - str1->yMin;
569 } else {
570 vertOverlap = 0;
571 }
572
573 // Combine strings if:
574 // They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
575 // 1. They appear to be part of the same line of text
576 // 2. They appear to be subsequent lines of a paragraph
577 // We assume (1) or (2) above, respectively, based on:
578 // (1) strings overlap vertically AND
579 // horizontal space between end of str1 and start of str2 is consistent with a single space or less;
580 // when rawOrder, the strings have to overlap vertically by at least 50%
581 // (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left
582 if (((((rawOrder && vertOverlap > 0.5 * space) || (!rawOrder && str2->yMin < str1->yMax)) && (horSpace > -0.5 * space && horSpace < space)) || (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak))
583 && (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
584 str1->dir == str2->dir // text direction the same
585 ) {
586 // printf("yes\n");
587 n = str1->len + str2->len;
588 if ((addSpace = horSpace > wordBreakThreshold * space)) {
589 ++n;
590 }
591 if (addLineBreak) {
592 ++n;
593 }
594
595 str1->size = (n + 15) & ~15;
596 str1->text = (Unicode *)grealloc(str1->text, str1->size * sizeof(Unicode));
597 str1->xRight = (double *)grealloc(str1->xRight, str1->size * sizeof(double));
598 if (addSpace) {
599 str1->text[str1->len] = 0x20;
600 str1->htext->append(xml ? " " : " ");
601 str1->xRight[str1->len] = str2->xMin;
602 ++str1->len;
603 }
604 if (addLineBreak) {
605 str1->text[str1->len] = '\n';
606 str1->htext->append("<br/>");
607 str1->xRight[str1->len] = str2->xMin;
608 ++str1->len;
609 str1->yMin = str2->yMin;
610 str1->yMax = str2->yMax;
611 str1->xMax = str2->xMax;
612 int fontLineSize = hfont1->getLineSize();
613 int curLineSize = (int)(vertSpace + space);
614 if (curLineSize != fontLineSize) {
615 HtmlFont *newfnt = new HtmlFont(*hfont1);
616 newfnt->setLineSize(curLineSize);
617 str1->fontpos = fonts->AddFont(*newfnt);
618 delete newfnt;
619 hfont1 = getFont(str1);
620 // we have to reget hfont2 because it's location could have
621 // changed on resize
622 hfont2 = getFont(str2);
623 }
624 }
625 for (i = 0; i < str2->len; ++i) {
626 str1->text[str1->len] = str2->text[i];
627 str1->xRight[str1->len] = str2->xRight[i];
628 ++str1->len;
629 }
630
631 /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
632 const HtmlLink *hlink1 = str1->getLink();
633 const HtmlLink *hlink2 = str2->getLink();
634 bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
635 bool finish_a = switch_links && hlink1 != nullptr;
636 bool finish_italic = hfont1->isItalic() && (!hfont2->isItalic() || finish_a);
637 bool finish_bold = hfont1->isBold() && (!hfont2->isBold() || finish_a || finish_italic);
638 CloseTags(str1->htext, finish_a, finish_italic, finish_bold);
639 if (switch_links && hlink2 != nullptr) {
640 GooString *ls = hlink2->getLinkStart();
641 str1->htext->append(ls);
642 delete ls;
643 }
644 if ((!hfont1->isItalic() || finish_italic) && hfont2->isItalic())
645 str1->htext->append("<i>", 3);
646 if ((!hfont1->isBold() || finish_bold) && hfont2->isBold())
647 str1->htext->append("<b>", 3);
648
649 str1->htext->append(str2->htext);
650 // str1 now contains href for link of str2 (if it is defined)
651 str1->link = str2->link;
652 hfont1 = hfont2;
653 if (str2->xMax > str1->xMax) {
654 str1->xMax = str2->xMax;
655 }
656 if (str2->yMax > str1->yMax) {
657 str1->yMax = str2->yMax;
658 }
659 str1->yxNext = str2->yxNext;
660 delete str2;
661 } else { // keep strings separate
662 // printf("no\n");
663 bool finish_a = str1->getLink() != nullptr;
664 bool finish_bold = hfont1->isBold();
665 bool finish_italic = hfont1->isItalic();
666 CloseTags(str1->htext, finish_a, finish_italic, finish_bold);
667
668 str1->xMin = curX;
669 str1->yMin = curY;
670 str1 = str2;
671 curX = str1->xMin;
672 curY = str1->yMin;
673 hfont1 = hfont2;
674 if (hfont1->isBold())
675 str1->htext->insert(0, "<b>", 3);
676 if (hfont1->isItalic())
677 str1->htext->insert(0, "<i>", 3);
678 if (str1->getLink() != nullptr) {
679 GooString *ls = str1->getLink()->getLinkStart();
680 str1->htext->insert(0, ls);
681 delete ls;
682 }
683 }
684 }
685 str1->xMin = curX;
686 str1->yMin = curY;
687
688 bool finish_bold = hfont1->isBold();
689 bool finish_italic = hfont1->isItalic();
690 bool finish_a = str1->getLink() != nullptr;
691 CloseTags(str1->htext, finish_a, finish_italic, finish_bold);
692
693 #if 0 //~ for debugging
694 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
695 printf("x=%3d..%3d y=%3d..%3d size=%2d ",
696 (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
697 (int)(str1->yMax - str1->yMin));
698 printf("'%s'\n", str1->htext->c_str());
699 }
700 printf("\n------------------------------------------------------------\n\n");
701 #endif
702 }
703
dumpAsXML(FILE * f,int page)704 void HtmlPage::dumpAsXML(FILE *f, int page)
705 {
706 fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
707 fprintf(f, " top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight, pageWidth);
708
709 for (int i = fontsPageMarker; i < fonts->size(); i++) {
710 GooString *fontCSStyle = fonts->CSStyle(i);
711 fprintf(f, "\t%s\n", fontCSStyle->c_str());
712 delete fontCSStyle;
713 }
714
715 for (auto ptr : imgList) {
716 auto img = static_cast<HtmlImage *>(ptr);
717 if (!noRoundedCoordinates) {
718 fprintf(f, "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin));
719 fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin));
720 } else {
721 fprintf(f, "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin);
722 fprintf(f, "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin);
723 }
724 fprintf(f, "src=\"%s\"/>\n", img->fName->c_str());
725 delete img;
726 }
727 imgList.clear();
728
729 for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
730 if (tmp->htext) {
731 if (!noRoundedCoordinates) {
732 fprintf(f, "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin));
733 fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin));
734 } else {
735 fprintf(f, "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin);
736 fprintf(f, "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin);
737 }
738 fprintf(f, "font=\"%d\">", tmp->fontpos);
739 fputs(tmp->htext->c_str(), f);
740 fputs("</text>\n", f);
741 }
742 }
743 fputs("</page>\n", f);
744 }
745
printCSS(FILE * f)746 static void printCSS(FILE *f)
747 {
748 // Image flip/flop CSS
749 // Source:
750 // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css
751 // tested in Chrome, Fx (Linux) and IE9 (W7)
752 static const char css[] = "<style type=\"text/css\">"
753 "\n"
754 "<!--"
755 "\n"
756 ".xflip {"
757 "\n"
758 " -moz-transform: scaleX(-1);"
759 "\n"
760 " -webkit-transform: scaleX(-1);"
761 "\n"
762 " -o-transform: scaleX(-1);"
763 "\n"
764 " transform: scaleX(-1);"
765 "\n"
766 " filter: fliph;"
767 "\n"
768 "}"
769 "\n"
770 ".yflip {"
771 "\n"
772 " -moz-transform: scaleY(-1);"
773 "\n"
774 " -webkit-transform: scaleY(-1);"
775 "\n"
776 " -o-transform: scaleY(-1);"
777 "\n"
778 " transform: scaleY(-1);"
779 "\n"
780 " filter: flipv;"
781 "\n"
782 "}"
783 "\n"
784 ".xyflip {"
785 "\n"
786 " -moz-transform: scaleX(-1) scaleY(-1);"
787 "\n"
788 " -webkit-transform: scaleX(-1) scaleY(-1);"
789 "\n"
790 " -o-transform: scaleX(-1) scaleY(-1);"
791 "\n"
792 " transform: scaleX(-1) scaleY(-1);"
793 "\n"
794 " filter: fliph + flipv;"
795 "\n"
796 "}"
797 "\n"
798 "-->"
799 "\n"
800 "</style>"
801 "\n";
802
803 fwrite(css, sizeof(css) - 1, 1, f);
804 }
805
dumpComplexHeaders(FILE * const file,FILE * & pageFile,int page)806 int HtmlPage::dumpComplexHeaders(FILE *const file, FILE *&pageFile, int page)
807 {
808
809 if (!noframes) {
810 const std::string pgNum = std::to_string(page);
811 std::string pageFileName(DocName->toStr());
812 if (!singleHtml) {
813 pageFileName += '-' + pgNum + ".html";
814 pageFile = fopen(pageFileName.c_str(), "w");
815 } else {
816 pageFileName += "-html.html";
817 pageFile = fopen(pageFileName.c_str(), "a");
818 }
819
820 if (!pageFile) {
821 error(errIO, -1, "Couldn't open html file '{0:s}'", pageFileName.c_str());
822 return 1;
823 }
824
825 if (!singleHtml)
826 fprintf(pageFile, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n", DOCTYPE, page);
827 else
828 fprintf(pageFile, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n", DOCTYPE, pageFileName.c_str());
829
830 const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
831 if (!singleHtml)
832 fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
833 else
834 fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding.c_str());
835 } else {
836 pageFile = file;
837 fprintf(pageFile, "<!-- Page %d -->\n", page);
838 fprintf(pageFile, "<a name=\"%d\"></a>\n", page);
839 }
840
841 return 0;
842 }
843
dumpComplex(FILE * file,int page,const std::vector<std::string> & backgroundImages)844 void HtmlPage::dumpComplex(FILE *file, int page, const std::vector<std::string> &backgroundImages)
845 {
846 FILE *pageFile;
847
848 if (firstPage == -1)
849 firstPage = page;
850
851 if (dumpComplexHeaders(file, pageFile, page)) {
852 error(errIO, -1, "Couldn't write headers.");
853 return;
854 }
855
856 fputs("<style type=\"text/css\">\n<!--\n", pageFile);
857 fputs("\tp {margin: 0; padding: 0;}", pageFile);
858 for (int i = fontsPageMarker; i != fonts->size(); i++) {
859 GooString *fontCSStyle;
860 if (!singleHtml)
861 fontCSStyle = fonts->CSStyle(i);
862 else
863 fontCSStyle = fonts->CSStyle(i, page);
864 fprintf(pageFile, "\t%s\n", fontCSStyle->c_str());
865 delete fontCSStyle;
866 }
867
868 fputs("-->\n</style>\n", pageFile);
869
870 if (!noframes) {
871 fputs("</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n", pageFile);
872 }
873
874 fprintf(pageFile, "<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n", page, pageWidth, pageHeight);
875
876 if (!ignore && (size_t)(page - firstPage) < backgroundImages.size()) {
877 fprintf(pageFile, "<img width=\"%d\" height=\"%d\" src=\"%s\" alt=\"background image\"/>\n", pageWidth, pageHeight, backgroundImages[page - firstPage].c_str());
878 }
879
880 for (HtmlString *tmp1 = yxStrings; tmp1; tmp1 = tmp1->yxNext) {
881 if (tmp1->htext) {
882 fprintf(pageFile, "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft", xoutRound(tmp1->yMin), xoutRound(tmp1->xMin));
883 if (!singleHtml) {
884 fputc('0', pageFile);
885 } else {
886 fprintf(pageFile, "%d", page);
887 }
888 fprintf(pageFile, "%d\">", tmp1->fontpos);
889 fputs(tmp1->htext->c_str(), pageFile);
890 fputs("</p>\n", pageFile);
891 }
892 }
893
894 fputs("</div>\n", pageFile);
895
896 if (!noframes) {
897 fputs("</body>\n</html>\n", pageFile);
898 fclose(pageFile);
899 }
900 }
901
dump(FILE * f,int pageNum,const std::vector<std::string> & backgroundImages)902 void HtmlPage::dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages)
903 {
904 if (complexMode || singleHtml) {
905 if (xml)
906 dumpAsXML(f, pageNum);
907 if (!xml)
908 dumpComplex(f, pageNum, backgroundImages);
909 } else {
910 fprintf(f, "<a name=%d></a>", pageNum);
911 // Loop over the list of image names on this page
912 for (auto ptr : imgList) {
913 auto img = static_cast<HtmlImage *>(ptr);
914
915 // see printCSS() for class names
916 const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" };
917 int style_index = 0;
918 if (img->xMin > img->xMax)
919 style_index += 1; // xFlip
920 if (img->yMin > img->yMax)
921 style_index += 2; // yFlip
922
923 fprintf(f, "<img%s src=\"%s\"/><br/>\n", styles[style_index], img->fName->c_str());
924 delete img;
925 }
926 imgList.clear();
927
928 GooString *str;
929 for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
930 if (tmp->htext) {
931 str = new GooString(tmp->htext);
932 fputs(str->c_str(), f);
933 delete str;
934 fputs("<br/>\n", f);
935 }
936 }
937 fputs("<hr/>\n", f);
938 }
939 }
940
clear()941 void HtmlPage::clear()
942 {
943 HtmlString *p1, *p2;
944
945 if (curStr) {
946 delete curStr;
947 curStr = nullptr;
948 }
949 for (p1 = yxStrings; p1; p1 = p2) {
950 p2 = p1->yxNext;
951 delete p1;
952 }
953 yxStrings = nullptr;
954 xyStrings = nullptr;
955 yxCur1 = yxCur2 = nullptr;
956
957 if (!noframes) {
958 delete fonts;
959 fonts = new HtmlFontAccu();
960 fontsPageMarker = 0;
961 } else {
962 fontsPageMarker = fonts->size();
963 }
964
965 delete links;
966 links = new HtmlLinks();
967 }
968
setDocName(const char * fname)969 void HtmlPage::setDocName(const char *fname)
970 {
971 DocName = new GooString(fname);
972 }
973
addImage(GooString * fname,GfxState * state)974 void HtmlPage::addImage(GooString *fname, GfxState *state)
975 {
976 HtmlImage *img = new HtmlImage(fname, state);
977 imgList.push_back(img);
978 }
979
980 //------------------------------------------------------------------------
981 // HtmlMetaVar
982 //------------------------------------------------------------------------
983
HtmlMetaVar(const char * _name,const char * _content)984 HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content)
985 {
986 name = new GooString(_name);
987 content = new GooString(_content);
988 }
989
~HtmlMetaVar()990 HtmlMetaVar::~HtmlMetaVar()
991 {
992 delete name;
993 delete content;
994 }
995
toString() const996 GooString *HtmlMetaVar::toString() const
997 {
998 GooString *result = new GooString("<meta name=\"");
999 result->append(name);
1000 result->append("\" content=\"");
1001 result->append(content);
1002 result->append("\"/>");
1003 return result;
1004 }
1005
1006 //------------------------------------------------------------------------
1007 // HtmlOutputDev
1008 //------------------------------------------------------------------------
1009
1010 static const char *HtmlEncodings[][2] = { { "Latin1", "ISO-8859-1" }, { nullptr, nullptr } };
1011
mapEncodingToHtml(const std::string & encoding)1012 std::string HtmlOutputDev::mapEncodingToHtml(const std::string &encoding)
1013 {
1014 for (int i = 0; HtmlEncodings[i][0] != nullptr; i++) {
1015 if (encoding == HtmlEncodings[i][0]) {
1016 return HtmlEncodings[i][1];
1017 }
1018 }
1019 return encoding;
1020 }
1021
doFrame(int firstPage)1022 void HtmlOutputDev::doFrame(int firstPage)
1023 {
1024 GooString *fName = new GooString(Docname);
1025 fName->append(".html");
1026
1027 if (!(fContentsFrame = fopen(fName->c_str(), "w"))) {
1028 error(errIO, -1, "Couldn't open html file '{0:t}'", fName);
1029 delete fName;
1030 return;
1031 }
1032
1033 delete fName;
1034
1035 const std::string baseName = gbasename(Docname->c_str());
1036 fputs(DOCTYPE, fContentsFrame);
1037 fputs("\n<html>", fContentsFrame);
1038 fputs("\n<head>", fContentsFrame);
1039 fprintf(fContentsFrame, "\n<title>%s</title>", docTitle->c_str());
1040 const std::string htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
1041 fprintf(fContentsFrame, "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
1042 dumpMetaVars(fContentsFrame);
1043 fprintf(fContentsFrame, "</head>\n");
1044 fputs("<frameset cols=\"100,*\">\n", fContentsFrame);
1045 fprintf(fContentsFrame, "<frame name=\"links\" src=\"%s_ind.html\"/>\n", baseName.c_str());
1046 fputs("<frame name=\"contents\" src=", fContentsFrame);
1047 if (complexMode)
1048 fprintf(fContentsFrame, "\"%s-%d.html\"", baseName.c_str(), firstPage);
1049 else
1050 fprintf(fContentsFrame, "\"%ss.html\"", baseName.c_str());
1051
1052 fputs("/>\n</frameset>\n</html>\n", fContentsFrame);
1053
1054 fclose(fContentsFrame);
1055 }
1056
HtmlOutputDev(Catalog * catalogA,const char * fileName,const char * title,const char * author,const char * keywords,const char * subject,const char * date,bool rawOrderA,int firstPage,bool outline)1057 HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrderA, int firstPage, bool outline)
1058 {
1059 catalog = catalogA;
1060 fContentsFrame = nullptr;
1061 page = nullptr;
1062 docTitle = new GooString(title);
1063 pages = nullptr;
1064 dumpJPEG = true;
1065 // write = true;
1066 rawOrder = rawOrderA;
1067 this->doOutline = outline;
1068 ok = false;
1069 // this->firstPage = firstPage;
1070 // pageNum=firstPage;
1071 // open file
1072 needClose = false;
1073 pages = new HtmlPage(rawOrder);
1074
1075 glMetaVars.push_back(new HtmlMetaVar("generator", "pdftohtml 0.36"));
1076 if (author)
1077 glMetaVars.push_back(new HtmlMetaVar("author", author));
1078 if (keywords)
1079 glMetaVars.push_back(new HtmlMetaVar("keywords", keywords));
1080 if (date)
1081 glMetaVars.push_back(new HtmlMetaVar("date", date));
1082 if (subject)
1083 glMetaVars.push_back(new HtmlMetaVar("subject", subject));
1084
1085 maxPageWidth = 0;
1086 maxPageHeight = 0;
1087
1088 pages->setDocName(fileName);
1089 Docname = new GooString(fileName);
1090
1091 // for non-xml output (complex or simple) with frames generate the left frame
1092 if (!xml && !noframes) {
1093 if (!singleHtml) {
1094 GooString *left = new GooString(fileName);
1095 left->append("_ind.html");
1096
1097 doFrame(firstPage);
1098
1099 if (!(fContentsFrame = fopen(left->c_str(), "w"))) {
1100 error(errIO, -1, "Couldn't open html file '{0:t}'", left);
1101 delete left;
1102 return;
1103 }
1104 delete left;
1105 fputs(DOCTYPE, fContentsFrame);
1106 fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n", fContentsFrame);
1107
1108 if (doOutline) {
1109 fprintf(fContentsFrame, "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>", gbasename(Docname->c_str()).c_str(), complexMode ? "-outline.html" : "s.html#outline");
1110 }
1111 }
1112 if (!complexMode) { /* not in complex mode */
1113
1114 GooString *right = new GooString(fileName);
1115 right->append("s.html");
1116
1117 if (!(page = fopen(right->c_str(), "w"))) {
1118 error(errIO, -1, "Couldn't open html file '{0:t}'", right);
1119 delete right;
1120 return;
1121 }
1122 delete right;
1123 fputs(DOCTYPE, page);
1124 fputs("<html>\n<head>\n<title></title>\n", page);
1125 printCSS(page);
1126 fputs("</head>\n<body>\n", page);
1127 }
1128 }
1129
1130 if (noframes) {
1131 if (stout)
1132 page = stdout;
1133 else {
1134 GooString *right = new GooString(fileName);
1135 if (!xml)
1136 right->append(".html");
1137 if (xml)
1138 right->append(".xml");
1139 if (!(page = fopen(right->c_str(), "w"))) {
1140 error(errIO, -1, "Couldn't open html file '{0:t}'", right);
1141 delete right;
1142 return;
1143 }
1144 delete right;
1145 }
1146
1147 const std::string htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
1148 if (xml) {
1149 fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding.c_str());
1150 fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
1151 fprintf(page, "<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION);
1152 } else {
1153 fprintf(page, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n", DOCTYPE, docTitle->c_str());
1154
1155 fprintf(page, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
1156
1157 dumpMetaVars(page);
1158 printCSS(page);
1159 fprintf(page, "</head>\n");
1160 fprintf(page, "<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
1161 }
1162 }
1163 ok = true;
1164 }
1165
~HtmlOutputDev()1166 HtmlOutputDev::~HtmlOutputDev()
1167 {
1168 delete Docname;
1169 delete docTitle;
1170
1171 for (auto entry : glMetaVars) {
1172 delete entry;
1173 }
1174
1175 if (fContentsFrame) {
1176 fputs("</body>\n</html>\n", fContentsFrame);
1177 fclose(fContentsFrame);
1178 }
1179 if (page != nullptr) {
1180 if (xml) {
1181 fputs("</pdf2xml>\n", page);
1182 fclose(page);
1183 } else if (!complexMode || xml || noframes) {
1184 fputs("</body>\n</html>\n", page);
1185 fclose(page);
1186 }
1187 }
1188 if (pages)
1189 delete pages;
1190 }
1191
startPage(int pageNumA,GfxState * state,XRef * xref)1192 void HtmlOutputDev::startPage(int pageNumA, GfxState *state, XRef *xref)
1193 {
1194 #if 0
1195 if (mode&&!xml){
1196 if (write){
1197 write=false;
1198 GooString* fname=Dirname(Docname);
1199 fname->append("image.log");
1200 if((tin=fopen(getFileNameFromPath(fname->c_str(),fname->getLength()),"w"))==NULL){
1201 printf("Error : can not open %s",fname);
1202 exit(1);
1203 }
1204 delete fname;
1205 // if(state->getRotation()!=0)
1206 // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
1207 // else
1208 fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());
1209 }
1210 }
1211 #endif
1212
1213 pageNum = pageNumA;
1214 const std::string str = gbasename(Docname->c_str());
1215 pages->clear();
1216 if (!noframes) {
1217 if (fContentsFrame) {
1218 if (complexMode)
1219 fprintf(fContentsFrame, "<a href=\"%s-%d.html\"", str.c_str(), pageNum);
1220 else
1221 fprintf(fContentsFrame, "<a href=\"%ss.html#%d\"", str.c_str(), pageNum);
1222 fprintf(fContentsFrame, " target=\"contents\" >Page %d</a><br/>\n", pageNum);
1223 }
1224 }
1225
1226 pages->pageWidth = static_cast<int>(state->getPageWidth());
1227 pages->pageHeight = static_cast<int>(state->getPageHeight());
1228 }
1229
endPage()1230 void HtmlOutputDev::endPage()
1231 {
1232 std::unique_ptr<Links> linksList = docPage->getLinks();
1233 for (int i = 0; i < linksList->getNumLinks(); ++i) {
1234 doProcessLink(linksList->getLink(i));
1235 }
1236
1237 pages->conv();
1238 pages->coalesce();
1239 pages->dump(page, pageNum, backgroundImages);
1240
1241 // I don't yet know what to do in the case when there are pages of different
1242 // sizes and we want complex output: running ghostscript many times
1243 // seems very inefficient. So for now I'll just use last page's size
1244 maxPageWidth = pages->pageWidth;
1245 maxPageHeight = pages->pageHeight;
1246
1247 // if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame);
1248 if (!stout && !globalParams->getErrQuiet())
1249 printf("Page-%d\n", (pageNum));
1250 }
1251
addBackgroundImage(const std::string & img)1252 void HtmlOutputDev::addBackgroundImage(const std::string &img)
1253 {
1254 backgroundImages.push_back(img);
1255 }
1256
updateFont(GfxState * state)1257 void HtmlOutputDev::updateFont(GfxState *state)
1258 {
1259 pages->updateFont(state);
1260 }
1261
beginString(GfxState * state,const GooString * s)1262 void HtmlOutputDev::beginString(GfxState *state, const GooString *s)
1263 {
1264 pages->beginString(state, s);
1265 }
1266
endString(GfxState * state)1267 void HtmlOutputDev::endString(GfxState *state)
1268 {
1269 pages->endString();
1270 }
1271
drawChar(GfxState * state,double x,double y,double dx,double dy,double originX,double originY,CharCode code,int,const Unicode * u,int uLen)1272 void HtmlOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int /*nBytes*/, const Unicode *u, int uLen)
1273 {
1274 if (!showHidden && (state->getRender() & 3) == 3) {
1275 return;
1276 }
1277 pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen);
1278 }
1279
drawJpegImage(GfxState * state,Stream * str)1280 void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
1281 {
1282 InMemoryFile ims;
1283 FILE *f1 = nullptr;
1284 int c;
1285
1286 // open the image file
1287 GooString *fName = createImageFileName("jpg");
1288 f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb");
1289 if (!f1) {
1290 error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
1291 delete fName;
1292 return;
1293 }
1294
1295 // initialize stream
1296 str = str->getNextStream();
1297 str->reset();
1298
1299 // copy the stream
1300 while ((c = str->getChar()) != EOF)
1301 fputc(c, f1);
1302
1303 fclose(f1);
1304
1305 if (dataUrls) {
1306 delete fName;
1307 fName = new GooString(std::string("data:image/jpeg;base64,") + gbase64Encode(ims.getBuffer()));
1308 }
1309 pages->addImage(fName, state);
1310 }
1311
drawPngImage(GfxState * state,Stream * str,int width,int height,GfxImageColorMap * colorMap,bool isMask)1312 void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask)
1313 {
1314 #ifdef ENABLE_LIBPNG
1315 FILE *f1;
1316 InMemoryFile ims;
1317
1318 if (!colorMap && !isMask) {
1319 error(errInternal, -1, "Can't have color image without a color map");
1320 return;
1321 }
1322
1323 // open the image file
1324 GooString *fName = createImageFileName("png");
1325 f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb");
1326 if (!f1) {
1327 error(errIO, -1, "Couldn't open image file '{0:t}'", fName);
1328 delete fName;
1329 return;
1330 }
1331
1332 PNGWriter *writer = new PNGWriter(isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB);
1333 // TODO can we calculate the resolution of the image?
1334 if (!writer->init(f1, width, height, 72, 72)) {
1335 error(errInternal, -1, "Can't init PNG for image '{0:t}'", fName);
1336 delete writer;
1337 fclose(f1);
1338 return;
1339 }
1340
1341 if (!isMask) {
1342 unsigned char *p;
1343 GfxRGB rgb;
1344 png_byte *row = (png_byte *)gmalloc(3 * width); // 3 bytes/pixel: RGB
1345 png_bytep *row_pointer = &row;
1346
1347 // Initialize the image stream
1348 ImageStream *imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits());
1349 imgStr->reset();
1350
1351 // For each line...
1352 for (int y = 0; y < height; y++) {
1353
1354 // Convert into a PNG row
1355 p = imgStr->getLine();
1356 if (!p) {
1357 error(errIO, -1, "Failed to read PNG. '{0:t}' will be incorrect", fName);
1358 delete fName;
1359 gfree(row);
1360 delete writer;
1361 delete imgStr;
1362 fclose(f1);
1363 return;
1364 }
1365 for (int x = 0; x < width; x++) {
1366 colorMap->getRGB(p, &rgb);
1367 // Write the RGB pixels into the row
1368 row[3 * x] = colToByte(rgb.r);
1369 row[3 * x + 1] = colToByte(rgb.g);
1370 row[3 * x + 2] = colToByte(rgb.b);
1371 p += colorMap->getNumPixelComps();
1372 }
1373
1374 if (!writer->writeRow(row_pointer)) {
1375 error(errIO, -1, "Failed to write into PNG '{0:t}'", fName);
1376 delete writer;
1377 delete imgStr;
1378 fclose(f1);
1379 return;
1380 }
1381 }
1382 gfree(row);
1383 imgStr->close();
1384 delete imgStr;
1385 } else { // isMask == true
1386 int size = (width + 7) / 8;
1387
1388 // PDF masks use 0 = draw current color, 1 = leave unchanged.
1389 // We invert this to provide the standard interpretation of alpha
1390 // (0 = transparent, 1 = opaque). If the colorMap already inverts
1391 // the mask we leave the data unchanged.
1392 int invert_bits = 0xff;
1393 if (colorMap) {
1394 GfxGray gray;
1395 unsigned char zero[gfxColorMaxComps];
1396 memset(zero, 0, sizeof(zero));
1397 colorMap->getGray(zero, &gray);
1398 if (colToByte(gray) == 0)
1399 invert_bits = 0x00;
1400 }
1401
1402 str->reset();
1403 unsigned char *png_row = (unsigned char *)gmalloc(size);
1404
1405 for (int ri = 0; ri < height; ++ri) {
1406 for (int i = 0; i < size; i++)
1407 png_row[i] = str->getChar() ^ invert_bits;
1408
1409 if (!writer->writeRow(&png_row)) {
1410 error(errIO, -1, "Failed to write into PNG '{0:t}'", fName);
1411 delete writer;
1412 fclose(f1);
1413 gfree(png_row);
1414 return;
1415 }
1416 }
1417 str->close();
1418 gfree(png_row);
1419 }
1420
1421 str->close();
1422
1423 writer->close();
1424 delete writer;
1425 fclose(f1);
1426
1427 if (dataUrls) {
1428 delete fName;
1429 fName = new GooString(std::string("data:image/png;base64,") + gbase64Encode(ims.getBuffer()));
1430 }
1431 pages->addImage(fName, state);
1432 #else
1433 return;
1434 #endif
1435 }
1436
createImageFileName(const char * ext)1437 GooString *HtmlOutputDev::createImageFileName(const char *ext)
1438 {
1439 return GooString::format("{0:s}-{1:d}_{2:d}.{3:s}", Docname->c_str(), pageNum, pages->getNumImages() + 1, ext);
1440 }
1441
drawImageMask(GfxState * state,Object * ref,Stream * str,int width,int height,bool invert,bool interpolate,bool inlineImg)1442 void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg)
1443 {
1444
1445 if (ignore || (complexMode && !xml)) {
1446 OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1447 return;
1448 }
1449
1450 // dump JPEG file
1451 if (dumpJPEG && str->getKind() == strDCT) {
1452 drawJpegImage(state, str);
1453 } else {
1454 #ifdef ENABLE_LIBPNG
1455 drawPngImage(state, str, width, height, nullptr, true);
1456 #else
1457 OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1458 #endif
1459 }
1460 }
1461
drawImage(GfxState * state,Object * ref,Stream * str,int width,int height,GfxImageColorMap * colorMap,bool interpolate,const int * maskColors,bool inlineImg)1462 void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, const int *maskColors, bool inlineImg)
1463 {
1464
1465 if (ignore || (complexMode && !xml)) {
1466 OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
1467 return;
1468 }
1469
1470 /*if( !globalParams->getErrQuiet() )
1471 printf("image stream of kind %d\n", str->getKind());*/
1472 // dump JPEG file
1473 if (dumpJPEG && str->getKind() == strDCT && (colorMap->getNumPixelComps() == 1 || colorMap->getNumPixelComps() == 3) && !inlineImg) {
1474 drawJpegImage(state, str);
1475 } else {
1476 #ifdef ENABLE_LIBPNG
1477 drawPngImage(state, str, width, height, colorMap);
1478 #else
1479 OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
1480 #endif
1481 }
1482 }
1483
doProcessLink(AnnotLink * link)1484 void HtmlOutputDev::doProcessLink(AnnotLink *link)
1485 {
1486 double _x1, _y1, _x2, _y2;
1487 int x1, y1, x2, y2;
1488
1489 link->getRect(&_x1, &_y1, &_x2, &_y2);
1490 cvtUserToDev(_x1, _y1, &x1, &y1);
1491
1492 cvtUserToDev(_x2, _y2, &x2, &y2);
1493
1494 GooString *_dest = getLinkDest(link);
1495 HtmlLink t((double)x1, (double)y2, (double)x2, (double)y1, _dest);
1496 pages->AddLink(t);
1497 delete _dest;
1498 }
1499
getLinkDest(AnnotLink * link)1500 GooString *HtmlOutputDev::getLinkDest(AnnotLink *link)
1501 {
1502 if (!link->getAction())
1503 return new GooString();
1504 switch (link->getAction()->getKind()) {
1505 case actionGoTo: {
1506 int destPage = 1;
1507 LinkGoTo *ha = (LinkGoTo *)link->getAction();
1508 std::unique_ptr<LinkDest> dest;
1509 if (ha->getDest() != nullptr)
1510 dest = std::unique_ptr<LinkDest>(ha->getDest()->copy());
1511 else if (ha->getNamedDest() != nullptr)
1512 dest = catalog->findDest(ha->getNamedDest());
1513
1514 if (dest) {
1515 GooString *file = new GooString(gbasename(Docname->c_str()));
1516
1517 if (dest->isPageRef()) {
1518 const Ref pageref = dest->getPageRef();
1519 destPage = catalog->findPage(pageref);
1520 } else {
1521 destPage = dest->getPageNum();
1522 }
1523
1524 /* complex simple
1525 frames file-4.html files.html#4
1526 noframes file.html#4 file.html#4
1527 */
1528 if (noframes) {
1529 file->append(".html#");
1530 file->append(std::to_string(destPage));
1531 } else {
1532 if (complexMode) {
1533 file->append("-");
1534 file->append(std::to_string(destPage));
1535 file->append(".html");
1536 } else {
1537 file->append("s.html#");
1538 file->append(std::to_string(destPage));
1539 }
1540 }
1541
1542 if (printCommands)
1543 printf(" link to page %d ", destPage);
1544 return file;
1545 } else {
1546 return new GooString();
1547 }
1548 }
1549 case actionGoToR: {
1550 LinkGoToR *ha = (LinkGoToR *)link->getAction();
1551 LinkDest *dest = nullptr;
1552 int destPage = 1;
1553 GooString *file = new GooString();
1554 if (ha->getFileName()) {
1555 delete file;
1556 file = new GooString(ha->getFileName()->c_str());
1557 }
1558 if (ha->getDest() != nullptr)
1559 dest = ha->getDest()->copy();
1560 if (dest && file) {
1561 if (!(dest->isPageRef()))
1562 destPage = dest->getPageNum();
1563 delete dest;
1564
1565 if (printCommands)
1566 printf(" link to page %d ", destPage);
1567 if (printHtml) {
1568 const char *p = file->c_str() + file->getLength() - 4;
1569 if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
1570 file->del(file->getLength() - 4, 4);
1571 file->append(".html");
1572 }
1573 file->append('#');
1574 file->append(std::to_string(destPage));
1575 }
1576 }
1577 if (printCommands && file)
1578 printf("filename %s\n", file->c_str());
1579 return file;
1580 }
1581 case actionURI: {
1582 LinkURI *ha = (LinkURI *)link->getAction();
1583 GooString *file = new GooString(ha->getURI());
1584 // printf("uri : %s\n",file->c_str());
1585 return file;
1586 }
1587 case actionLaunch:
1588 if (printHtml) {
1589 LinkLaunch *ha = (LinkLaunch *)link->getAction();
1590 GooString *file = new GooString(ha->getFileName()->c_str());
1591 const char *p = file->c_str() + file->getLength() - 4;
1592 if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
1593 file->del(file->getLength() - 4, 4);
1594 file->append(".html");
1595 }
1596 if (printCommands)
1597 printf("filename %s", file->c_str());
1598
1599 return file;
1600 }
1601 // fallthrough
1602 default:
1603 return new GooString();
1604 }
1605 }
1606
dumpMetaVars(FILE * file)1607 void HtmlOutputDev::dumpMetaVars(FILE *file)
1608 {
1609 GooString *var;
1610
1611 for (const HtmlMetaVar *t : glMetaVars) {
1612 var = t->toString();
1613 fprintf(file, "%s\n", var->c_str());
1614 delete var;
1615 }
1616 }
1617
dumpDocOutline(PDFDoc * doc)1618 bool HtmlOutputDev::dumpDocOutline(PDFDoc *doc)
1619 {
1620 FILE *output = nullptr;
1621 bool bClose = false;
1622
1623 if (!ok)
1624 return false;
1625
1626 Outline *outline = doc->getOutline();
1627 if (!outline)
1628 return false;
1629
1630 const std::vector<OutlineItem *> *outlines = outline->getItems();
1631 if (!outlines)
1632 return false;
1633
1634 if (!complexMode || xml) {
1635 output = page;
1636 } else if (complexMode && !xml) {
1637 if (noframes) {
1638 output = page;
1639 fputs("<hr/>\n", output);
1640 } else {
1641 GooString *str = Docname->copy();
1642 str->append("-outline.html");
1643 output = fopen(str->c_str(), "w");
1644 delete str;
1645 if (output == nullptr)
1646 return false;
1647 bClose = true;
1648
1649 const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
1650
1651 fprintf(output,
1652 "<html xmlns=\"http://www.w3.org/1999/xhtml\" "
1653 "lang=\"\" xml:lang=\"\">\n"
1654 "<head>\n"
1655 "<title>Document Outline</title>\n"
1656 "<meta http-equiv=\"Content-Type\" content=\"text/html; "
1657 "charset=%s\"/>\n"
1658 "</head>\n<body>\n",
1659 htmlEncoding.c_str());
1660 }
1661 }
1662
1663 if (!xml) {
1664 bool done = newHtmlOutlineLevel(output, outlines);
1665 if (done && !complexMode)
1666 fputs("<hr/>\n", output);
1667
1668 if (bClose) {
1669 fputs("</body>\n</html>\n", output);
1670 fclose(output);
1671 }
1672 } else
1673 newXmlOutlineLevel(output, outlines);
1674
1675 return true;
1676 }
1677
newHtmlOutlineLevel(FILE * output,const std::vector<OutlineItem * > * outlines,int level)1678 bool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines, int level)
1679 {
1680 bool atLeastOne = false;
1681
1682 if (level == 1) {
1683 fputs("<a name=\"outline\"></a>", output);
1684 fputs("<h1>Document Outline</h1>\n", output);
1685 }
1686 fputs("<ul>\n", output);
1687
1688 for (OutlineItem *item : *outlines) {
1689 GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(), item->getTitleLength());
1690
1691 GooString *linkName = nullptr;
1692 ;
1693 const int itemPage = getOutlinePageNum(item);
1694 if (itemPage > 0) {
1695 /* complex simple
1696 frames file-4.html files.html#4
1697 noframes file.html#4 file.html#4
1698 */
1699 linkName = new GooString(gbasename(Docname->c_str()));
1700 if (noframes) {
1701 linkName->append(".html#");
1702 linkName->append(std::to_string(itemPage));
1703 } else {
1704 if (complexMode) {
1705 linkName->append("-");
1706 linkName->append(std::to_string(itemPage));
1707 linkName->append(".html");
1708 } else {
1709 linkName->append("s.html#");
1710 linkName->append(std::to_string(itemPage));
1711 }
1712 }
1713 }
1714
1715 fputs("<li>", output);
1716 if (linkName)
1717 fprintf(output, "<a href=\"%s\">", linkName->c_str());
1718 fputs(titleStr->c_str(), output);
1719 if (linkName) {
1720 fputs("</a>", output);
1721 delete linkName;
1722 }
1723 delete titleStr;
1724 atLeastOne = true;
1725
1726 item->open();
1727 if (item->hasKids() && item->getKids()) {
1728 fputs("\n", output);
1729 newHtmlOutlineLevel(output, item->getKids(), level + 1);
1730 }
1731 fputs("</li>\n", output);
1732 }
1733 fputs("</ul>\n", output);
1734
1735 return atLeastOne;
1736 }
1737
newXmlOutlineLevel(FILE * output,const std::vector<OutlineItem * > * outlines)1738 void HtmlOutputDev::newXmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines)
1739 {
1740 fputs("<outline>\n", output);
1741
1742 for (OutlineItem *item : *outlines) {
1743 GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(), item->getTitleLength());
1744 const int itemPage = getOutlinePageNum(item);
1745 if (itemPage > 0) {
1746 fprintf(output, "<item page=\"%d\">%s</item>\n", itemPage, titleStr->c_str());
1747 } else {
1748 fprintf(output, "<item>%s</item>\n", titleStr->c_str());
1749 }
1750 delete titleStr;
1751
1752 item->open();
1753 if (item->hasKids() && item->getKids()) {
1754 newXmlOutlineLevel(output, item->getKids());
1755 }
1756 }
1757
1758 fputs("</outline>\n", output);
1759 }
1760
getOutlinePageNum(OutlineItem * item)1761 int HtmlOutputDev::getOutlinePageNum(OutlineItem *item)
1762 {
1763 const LinkAction *action = item->getAction();
1764 const LinkGoTo *link = nullptr;
1765 std::unique_ptr<LinkDest> linkdest;
1766 int pagenum = -1;
1767
1768 if (!action || action->getKind() != actionGoTo)
1769 return pagenum;
1770
1771 link = static_cast<const LinkGoTo *>(action);
1772
1773 if (!link || !link->isOk())
1774 return pagenum;
1775
1776 if (link->getDest())
1777 linkdest = std::unique_ptr<LinkDest>(link->getDest()->copy());
1778 else if (link->getNamedDest())
1779 linkdest = catalog->findDest(link->getNamedDest());
1780
1781 if (!linkdest)
1782 return pagenum;
1783
1784 if (linkdest->isPageRef()) {
1785 const Ref pageref = linkdest->getPageRef();
1786 pagenum = catalog->findPage(pageref);
1787 } else {
1788 pagenum = linkdest->getPageNum();
1789 }
1790
1791 return pagenum;
1792 }
1793