1 //======================================================================== 2 // 3 // HtmlOutputDev.h 4 // 5 // Copyright 1997 Derek B. Noonburg 6 // 7 // Changed 1999 by G.Ovtcharov 8 //======================================================================== 9 10 //======================================================================== 11 // 12 // Modified under the Poppler project - http://poppler.freedesktop.org 13 // 14 // All changes made under the Poppler project to this file are licensed 15 // under GPL version 2 or later 16 // 17 // Copyright (C) 2006, 2007, 2009 Albert Astals Cid <aacid@kde.org> 18 // Copyright (C) 2008-2009 Warren Toomey <wkt@tuhs.org> 19 // Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org> 20 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net> 21 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> 22 // 23 // To see a description of the changes please see the Changelog file that 24 // came with your tarball or type make ChangeLog if you are building from git 25 // 26 //======================================================================== 27 28 #ifndef HTMLOUTPUTDEV_H 29 #define HTMLOUTPUTDEV_H 30 31 #ifdef __GNUC__ 32 #pragma interface 33 #endif 34 35 #include <stdio.h> 36 #include "goo/gtypes.h" 37 #include "goo/GooList.h" 38 #include "GfxFont.h" 39 #include "OutputDev.h" 40 #include "HtmlLinks.h" 41 #include "HtmlFonts.h" 42 #include "Link.h" 43 #include "Catalog.h" 44 #include "UnicodeMap.h" 45 46 47 #ifdef _WIN32 48 # define SLASH '\\' 49 #else 50 # define SLASH '/' 51 #endif 52 53 #define xoutRound(x) ((int)(x + 0.5)) 54 55 #define DOCTYPE "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">" 56 #define DOCTYPE_FRAMES "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\"\n\"http://www.w3.org/TR/html4/frameset.dtd\">" 57 58 class GfxState; 59 class GooString; 60 //------------------------------------------------------------------------ 61 // HtmlString 62 //------------------------------------------------------------------------ 63 64 enum UnicodeTextDirection { 65 textDirUnknown, 66 textDirLeftRight, 67 textDirRightLeft, 68 textDirTopBottom 69 }; 70 71 72 class HtmlString { 73 public: 74 75 // Constructor. 76 HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts); 77 78 // Destructor. 79 ~HtmlString(); 80 81 // Add a character to the string. 82 void addChar(GfxState *state, double x, double y, 83 double dx, double dy, 84 Unicode u); getLink()85 HtmlLink* getLink() { return link; } 86 void endString(); // postprocessing 87 88 private: 89 // aender die text variable 90 HtmlLink *link; 91 double xMin, xMax; // bounding box x coordinates 92 double yMin, yMax; // bounding box y coordinates 93 int col; // starting column 94 Unicode *text; // the text 95 double *xRight; // right-hand x coord of each char 96 HtmlString *yxNext; // next string in y-major order 97 HtmlString *xyNext; // next string in x-major order 98 int fontpos; 99 GooString* htext; 100 int len; // length of text and xRight 101 int size; // size of text and xRight arrays 102 UnicodeTextDirection dir; // direction (left to right/right to left) 103 104 friend class HtmlPage; 105 106 }; 107 108 109 //------------------------------------------------------------------------ 110 // HtmlPage 111 //------------------------------------------------------------------------ 112 113 114 115 class HtmlPage { 116 public: 117 118 // Constructor. 119 HtmlPage(GBool rawOrder, char *imgExtVal); 120 121 // Destructor. 122 ~HtmlPage(); 123 124 // Begin a new string. 125 void beginString(GfxState *state, GooString *s); 126 127 // Add a character to the current string. 128 void addChar(GfxState *state, double x, double y, 129 double dx, double dy, 130 double ox, double oy, 131 Unicode *u, int uLen); //Guchar c); 132 133 void updateFont(GfxState *state); 134 135 // End the current string, sorting it into the list of strings. 136 void endString(); 137 138 // Coalesce strings that look like parts of the same line. 139 void coalesce(); 140 141 // Find a string. If <top> is true, starts looking at top of page; 142 // otherwise starts looking at <xMin>,<yMin>. If <bottom> is true, 143 // stops looking at bottom of page; otherwise stops looking at 144 // <xMax>,<yMax>. If found, sets the text bounding rectange and 145 // returns true; otherwise returns false. 146 147 148 // new functions AddLink(const HtmlLink & x)149 void AddLink(const HtmlLink& x){ 150 links->AddLink(x); 151 } 152 153 void dump(FILE *f, int pageNum); 154 155 // Clear the page. 156 void clear(); 157 158 void conv(); 159 private: getFont(HtmlString * hStr)160 HtmlFont* getFont(HtmlString *hStr) { return fonts->Get(hStr->fontpos); } 161 162 double fontSize; // current font size 163 GBool rawOrder; // keep strings in content stream order 164 165 HtmlString *curStr; // currently active string 166 167 HtmlString *yxStrings; // strings in y-major order 168 HtmlString *xyStrings; // strings in x-major order 169 HtmlString *yxCur1, *yxCur2; // cursors for yxStrings list 170 171 void setDocName(char* fname); 172 void dumpAsXML(FILE* f,int page); 173 void dumpComplex(FILE* f, int page); 174 175 // marks the position of the fonts that belong to current page (for noframes) 176 int fontsPageMarker; 177 HtmlFontAccu *fonts; 178 HtmlLinks *links; 179 180 GooString *DocName; 181 GooString *imgExt; 182 int pageWidth; 183 int pageHeight; 184 static int pgNum; 185 int firstPage; // used to begin the numeration of pages 186 187 friend class HtmlOutputDev; 188 }; 189 190 //------------------------------------------------------------------------ 191 // HtmlMetaVar 192 //------------------------------------------------------------------------ 193 class HtmlMetaVar { 194 public: 195 HtmlMetaVar(char *_name, char *_content); 196 ~HtmlMetaVar(); 197 198 GooString* toString(); 199 200 private: 201 202 GooString *name; 203 GooString *content; 204 }; 205 206 //------------------------------------------------------------------------ 207 // HtmlOutputDev 208 //------------------------------------------------------------------------ 209 210 class HtmlOutputDev: public OutputDev { 211 public: 212 213 // Open a text output file. If <fileName> is NULL, no file is written 214 // (this is useful, e.g., for searching text). If <useASCII7> is true, 215 // text is converted to 7-bit ASCII; otherwise, text is converted to 216 // 8-bit ISO Latin-1. <useASCII7> should also be set for Japanese 217 // (EUC-JP) text. If <rawOrder> is true, the text is kept in content 218 // stream order. 219 HtmlOutputDev(char *fileName, char *title, 220 char *author, 221 char *keywords, 222 char *subject, 223 char *date, 224 char *extension, 225 GBool rawOrder, 226 int firstPage = 1, 227 GBool outline = 0); 228 229 // Destructor. 230 virtual ~HtmlOutputDev(); 231 232 // Check if file was successfully created. isOk()233 virtual GBool isOk() { return ok; } 234 235 //---- get info about output device 236 237 // Does this device use upside-down coordinates? 238 // (Upside-down means (0,0) is the top left corner of the page.) upsideDown()239 virtual GBool upsideDown() { return gTrue; } 240 241 // Does this device use drawChar() or drawString()? useDrawChar()242 virtual GBool useDrawChar() { return gTrue; } 243 244 // Does this device use beginType3Char/endType3Char? Otherwise, 245 // text in Type 3 fonts will be drawn with drawChar/drawString. interpretType3Chars()246 virtual GBool interpretType3Chars() { return gFalse; } 247 248 // Does this device need non-text content? needNonText()249 virtual GBool needNonText() { return gTrue; } 250 251 //----- initialization and control 252 253 virtual GBool checkPageSlice(Page *page, double hDPI, double vDPI, 254 int rotate, GBool useMediaBox, GBool crop, 255 int sliceX, int sliceY, int sliceW, int sliceH, 256 GBool printing, Catalog * catalogA, 257 GBool (* abortCheckCbk)(void *data) = NULL, 258 void * abortCheckCbkData = NULL) 259 { 260 docPage = page; 261 catalog = catalogA; 262 return gTrue; 263 } 264 265 266 // Start a page. 267 virtual void startPage(int pageNum, GfxState *state); 268 269 // End a page. 270 virtual void endPage(); 271 272 //----- update text state 273 virtual void updateFont(GfxState *state); 274 275 //----- text drawing 276 virtual void beginString(GfxState *state, GooString *s); 277 virtual void endString(GfxState *state); 278 virtual void drawChar(GfxState *state, double x, double y, 279 double dx, double dy, 280 double originX, double originY, 281 CharCode code, int nBytes, Unicode *u, int uLen); 282 283 virtual void drawImageMask(GfxState *state, Object *ref, 284 Stream *str, 285 int width, int height, GBool invert, 286 GBool interpolate, GBool inlineImg); 287 virtual void drawImage(GfxState *state, Object *ref, Stream *str, 288 int width, int height, GfxImageColorMap *colorMap, 289 GBool interpolate, int *maskColors, GBool inlineImg); 290 291 //new feature DevType()292 virtual int DevType() {return 1234;} 293 getPageWidth()294 int getPageWidth() { return maxPageWidth; } getPageHeight()295 int getPageHeight() { return maxPageHeight; } 296 297 GBool dumpDocOutline(Catalog* catalog); 298 299 private: 300 // convert encoding into a HTML standard, or encoding->getCString if not 301 // recognized 302 static char* mapEncodingToHtml(GooString* encoding); 303 void doProcessLink(Link *link); 304 GooString* getLinkDest(Link *link,Catalog *catalog); 305 void dumpMetaVars(FILE *); 306 void doFrame(int firstPage); 307 GBool newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level = 1); 308 309 FILE *fContentsFrame; 310 FILE *page; // html file 311 //FILE *tin; // image log file 312 //GBool write; 313 GBool needClose; // need to close the file? 314 HtmlPage *pages; // text for the current page 315 GBool rawOrder; // keep text in content stream order 316 GBool doOutline; // output document outline 317 GBool ok; // set up ok? 318 GBool dumpJPEG; 319 int pageNum; 320 int maxPageWidth; 321 int maxPageHeight; 322 static int imgNum; 323 static GooList *imgList; 324 GooString *Docname; 325 GooString *docTitle; 326 GooList *glMetaVars; 327 Catalog *catalog; 328 Page *docPage; 329 friend class HtmlPage; 330 }; 331 332 #endif 333