1 //========================================================================
2 //
3 // HtmlOutputDev.h
4 //
5 // Copyright 1997 Derek B. Noonburg
6 //
7 // Changed 1999 by G.Ovtcharov
8 //========================================================================
9 
10 //========================================================================
11 //
12 // Modified under the Poppler project - http://poppler.freedesktop.org
13 //
14 // All changes made under the Poppler project to this file are licensed
15 // under GPL version 2 or later
16 //
17 // Copyright (C) 2006, 2007, 2009 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008-2009 Warren Toomey <wkt@tuhs.org>
19 // Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org>
20 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
21 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
22 //
23 // To see a description of the changes please see the Changelog file that
24 // came with your tarball or type make ChangeLog if you are building from git
25 //
26 //========================================================================
27 
28 #ifndef HTMLOUTPUTDEV_H
29 #define HTMLOUTPUTDEV_H
30 
31 #ifdef __GNUC__
32 #pragma interface
33 #endif
34 
35 #include <stdio.h>
36 #include "goo/gtypes.h"
37 #include "goo/GooList.h"
38 #include "GfxFont.h"
39 #include "OutputDev.h"
40 #include "HtmlLinks.h"
41 #include "HtmlFonts.h"
42 #include "Link.h"
43 #include "Catalog.h"
44 #include "UnicodeMap.h"
45 
46 
47 #ifdef _WIN32
48 #  define SLASH '\\'
49 #else
50 #  define SLASH '/'
51 #endif
52 
53 #define xoutRound(x) ((int)(x + 0.5))
54 
55 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"
56 #define DOCTYPE_FRAMES "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\"\n\"http://www.w3.org/TR/html4/frameset.dtd\">"
57 
58 class GfxState;
59 class GooString;
60 //------------------------------------------------------------------------
61 // HtmlString
62 //------------------------------------------------------------------------
63 
64 enum UnicodeTextDirection {
65   textDirUnknown,
66   textDirLeftRight,
67   textDirRightLeft,
68   textDirTopBottom
69 };
70 
71 
72 class HtmlString {
73 public:
74 
75   // Constructor.
76   HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts);
77 
78   // Destructor.
79   ~HtmlString();
80 
81   // Add a character to the string.
82   void addChar(GfxState *state, double x, double y,
83 	       double dx, double dy,
84 	       Unicode u);
getLink()85   HtmlLink* getLink() { return link; }
86   void endString(); // postprocessing
87 
88 private:
89 // aender die text variable
90   HtmlLink *link;
91   double xMin, xMax;		// bounding box x coordinates
92   double yMin, yMax;		// bounding box y coordinates
93   int col;			// starting column
94   Unicode *text;		// the text
95   double *xRight;		// right-hand x coord of each char
96   HtmlString *yxNext;		// next string in y-major order
97   HtmlString *xyNext;		// next string in x-major order
98   int fontpos;
99   GooString* htext;
100   int len;			// length of text and xRight
101   int size;			// size of text and xRight arrays
102   UnicodeTextDirection dir;	// direction (left to right/right to left)
103 
104   friend class HtmlPage;
105 
106 };
107 
108 
109 //------------------------------------------------------------------------
110 // HtmlPage
111 //------------------------------------------------------------------------
112 
113 
114 
115 class HtmlPage {
116 public:
117 
118   // Constructor.
119   HtmlPage(GBool rawOrder, char *imgExtVal);
120 
121   // Destructor.
122   ~HtmlPage();
123 
124   // Begin a new string.
125   void beginString(GfxState *state, GooString *s);
126 
127   // Add a character to the current string.
128   void addChar(GfxState *state, double x, double y,
129 	       double dx, double dy,
130 		double ox, double oy,
131 		Unicode *u, int uLen); //Guchar c);
132 
133   void updateFont(GfxState *state);
134 
135   // End the current string, sorting it into the list of strings.
136   void endString();
137 
138   // Coalesce strings that look like parts of the same line.
139   void coalesce();
140 
141   // Find a string.  If <top> is true, starts looking at top of page;
142   // otherwise starts looking at <xMin>,<yMin>.  If <bottom> is true,
143   // stops looking at bottom of page; otherwise stops looking at
144   // <xMax>,<yMax>.  If found, sets the text bounding rectange and
145   // returns true; otherwise returns false.
146 
147 
148   // new functions
AddLink(const HtmlLink & x)149   void AddLink(const HtmlLink& x){
150     links->AddLink(x);
151   }
152 
153  void dump(FILE *f, int pageNum);
154 
155   // Clear the page.
156   void clear();
157 
158   void conv();
159 private:
getFont(HtmlString * hStr)160   HtmlFont* getFont(HtmlString *hStr) { return fonts->Get(hStr->fontpos); }
161 
162   double fontSize;		// current font size
163   GBool rawOrder;		// keep strings in content stream order
164 
165   HtmlString *curStr;		// currently active string
166 
167   HtmlString *yxStrings;	// strings in y-major order
168   HtmlString *xyStrings;	// strings in x-major order
169   HtmlString *yxCur1, *yxCur2;	// cursors for yxStrings list
170 
171   void setDocName(char* fname);
172   void dumpAsXML(FILE* f,int page);
173   void dumpComplex(FILE* f, int page);
174 
175   // marks the position of the fonts that belong to current page (for noframes)
176   int fontsPageMarker;
177   HtmlFontAccu *fonts;
178   HtmlLinks *links;
179 
180   GooString *DocName;
181   GooString *imgExt;
182   int pageWidth;
183   int pageHeight;
184   static int pgNum;
185   int firstPage;                // used to begin the numeration of pages
186 
187   friend class HtmlOutputDev;
188 };
189 
190 //------------------------------------------------------------------------
191 // HtmlMetaVar
192 //------------------------------------------------------------------------
193 class HtmlMetaVar {
194 public:
195     HtmlMetaVar(char *_name, char *_content);
196     ~HtmlMetaVar();
197 
198     GooString* toString();
199 
200 private:
201 
202     GooString *name;
203     GooString *content;
204 };
205 
206 //------------------------------------------------------------------------
207 // HtmlOutputDev
208 //------------------------------------------------------------------------
209 
210 class HtmlOutputDev: public OutputDev {
211 public:
212 
213   // Open a text output file.  If <fileName> is NULL, no file is written
214   // (this is useful, e.g., for searching text).  If <useASCII7> is true,
215   // text is converted to 7-bit ASCII; otherwise, text is converted to
216   // 8-bit ISO Latin-1.  <useASCII7> should also be set for Japanese
217   // (EUC-JP) text.  If <rawOrder> is true, the text is kept in content
218   // stream order.
219   HtmlOutputDev(char *fileName, char *title,
220 	  char *author,
221 	  char *keywords,
222 	  char *subject,
223 	  char *date,
224 	  char *extension,
225 	  GBool rawOrder,
226 	  int firstPage = 1,
227 	  GBool outline = 0);
228 
229   // Destructor.
230   virtual ~HtmlOutputDev();
231 
232   // Check if file was successfully created.
isOk()233   virtual GBool isOk() { return ok; }
234 
235   //---- get info about output device
236 
237   // Does this device use upside-down coordinates?
238   // (Upside-down means (0,0) is the top left corner of the page.)
upsideDown()239   virtual GBool upsideDown() { return gTrue; }
240 
241   // Does this device use drawChar() or drawString()?
useDrawChar()242   virtual GBool useDrawChar() { return gTrue; }
243 
244   // Does this device use beginType3Char/endType3Char?  Otherwise,
245   // text in Type 3 fonts will be drawn with drawChar/drawString.
interpretType3Chars()246   virtual GBool interpretType3Chars() { return gFalse; }
247 
248   // Does this device need non-text content?
needNonText()249   virtual GBool needNonText() { return gTrue; }
250 
251   //----- initialization and control
252 
253   virtual GBool checkPageSlice(Page *page, double hDPI, double vDPI,
254                                int rotate, GBool useMediaBox, GBool crop,
255                                int sliceX, int sliceY, int sliceW, int sliceH,
256                                GBool printing, Catalog * catalogA,
257                                GBool (* abortCheckCbk)(void *data) = NULL,
258                                void * abortCheckCbkData = NULL)
259   {
260    docPage = page;
261    catalog = catalogA;
262    return gTrue;
263   }
264 
265 
266   // Start a page.
267   virtual void startPage(int pageNum, GfxState *state);
268 
269   // End a page.
270   virtual void endPage();
271 
272   //----- update text state
273   virtual void updateFont(GfxState *state);
274 
275   //----- text drawing
276   virtual void beginString(GfxState *state, GooString *s);
277   virtual void endString(GfxState *state);
278   virtual void drawChar(GfxState *state, double x, double y,
279 			double dx, double dy,
280 			double originX, double originY,
281 			CharCode code, int nBytes, Unicode *u, int uLen);
282 
283   virtual void drawImageMask(GfxState *state, Object *ref,
284 			     Stream *str,
285 			     int width, int height, GBool invert,
286 			     GBool interpolate, GBool inlineImg);
287   virtual void drawImage(GfxState *state, Object *ref, Stream *str,
288 			 int width, int height, GfxImageColorMap *colorMap,
289 			 GBool interpolate, int *maskColors, GBool inlineImg);
290 
291   //new feature
DevType()292   virtual int DevType() {return 1234;}
293 
getPageWidth()294   int getPageWidth() { return maxPageWidth; }
getPageHeight()295   int getPageHeight() { return maxPageHeight; }
296 
297   GBool dumpDocOutline(Catalog* catalog);
298 
299 private:
300   // convert encoding into a HTML standard, or encoding->getCString if not
301   // recognized
302   static char* mapEncodingToHtml(GooString* encoding);
303   void doProcessLink(Link *link);
304   GooString* getLinkDest(Link *link,Catalog *catalog);
305   void dumpMetaVars(FILE *);
306   void doFrame(int firstPage);
307   GBool newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level = 1);
308 
309   FILE *fContentsFrame;
310   FILE *page;                   // html file
311   //FILE *tin;                    // image log file
312   //GBool write;
313   GBool needClose;		// need to close the file?
314   HtmlPage *pages;		// text for the current page
315   GBool rawOrder;		// keep text in content stream order
316   GBool doOutline;		// output document outline
317   GBool ok;			// set up ok?
318   GBool dumpJPEG;
319   int pageNum;
320   int maxPageWidth;
321   int maxPageHeight;
322   static int imgNum;
323   static GooList *imgList;
324   GooString *Docname;
325   GooString *docTitle;
326   GooList *glMetaVars;
327   Catalog *catalog;
328   Page *docPage;
329   friend class HtmlPage;
330 };
331 
332 #endif
333