1 //======================================================================== 2 // 3 // TextOutputDev.h 4 // 5 // Copyright 1997-2003 Glyph & Cog, LLC 6 // 7 //======================================================================== 8 9 //======================================================================== 10 // 11 // Modified under the Poppler project - http://poppler.freedesktop.org 12 // 13 // Copyright (C) 2005-2007 Kristian Høgsberg <krh@redhat.com> 14 // Copyright (C) 2006 Ed Catmur <ed@catmur.co.uk> 15 // Copyright (C) 2007-2008 Carlos Garcia Campos <carlosgc@gnome.org> 16 // Copyright (C) 2007 Adrian Johnson <ajohnson@redneon.com> 17 // Copyright (C) 2008, 2010 Albert Astals Cid <aacid@kde.org> 18 // Copyright (C) 2010 Brian Ewins <brian.ewins@gmail.com> 19 // 20 // To see a description of the changes please see the Changelog file that 21 // came with your tarball or type make ChangeLog if you are building from git 22 // 23 //======================================================================== 24 25 #ifndef TEXTOUTPUTDEV_H 26 #define TEXTOUTPUTDEV_H 27 28 #ifdef USE_GCC_PRAGMAS 29 #pragma interface 30 #endif 31 32 #include "poppler-config.h" 33 #include <stdio.h> 34 #include "goo/gtypes.h" 35 #include "GfxFont.h" 36 #include "GfxState.h" 37 #include "OutputDev.h" 38 39 class GooString; 40 class GooList; 41 class Gfx; 42 class GfxFont; 43 class GfxState; 44 class UnicodeMap; 45 class Link; 46 47 class TextWord; 48 class TextPool; 49 class TextLine; 50 class TextLineFrag; 51 class TextBlock; 52 class TextFlow; 53 class TextWordList; 54 class TextPage; 55 class TextSelectionVisitor; 56 57 //------------------------------------------------------------------------ 58 59 typedef void (*TextOutputFunc)(void *stream, char *text, int len); 60 61 enum SelectionStyle { 62 selectionStyleGlyph, 63 selectionStyleWord, 64 selectionStyleLine 65 }; 66 67 //------------------------------------------------------------------------ 68 // TextFontInfo 69 //------------------------------------------------------------------------ 70 71 class TextFontInfo { 72 public: 73 74 TextFontInfo(GfxState *state); 75 ~TextFontInfo(); 76 77 GBool matches(GfxState *state); 78 79 #if TEXTOUT_WORD_LIST 80 // Get the font name (which may be NULL). getFontName()81 GooString *getFontName() { return fontName; } 82 83 // Get font descriptor flags. isFixedWidth()84 GBool isFixedWidth() { return flags & fontFixedWidth; } isSerif()85 GBool isSerif() { return flags & fontSerif; } isSymbolic()86 GBool isSymbolic() { return flags & fontSymbolic; } isItalic()87 GBool isItalic() { return flags & fontItalic; } isBold()88 GBool isBold() { return flags & fontBold; } 89 #endif 90 91 private: 92 93 GfxFont *gfxFont; 94 #if TEXTOUT_WORD_LIST 95 GooString *fontName; 96 int flags; 97 #endif 98 99 friend class TextWord; 100 friend class TextPage; 101 friend class TextSelectionPainter; 102 }; 103 104 //------------------------------------------------------------------------ 105 // TextWord 106 //------------------------------------------------------------------------ 107 108 class TextWord { 109 public: 110 111 // Constructor. 112 TextWord(GfxState *state, int rotA, double x0, double y0, 113 int charPosA, TextFontInfo *fontA, double fontSize); 114 115 // Destructor. 116 ~TextWord(); 117 118 // Add a character to the word. 119 void addChar(GfxState *state, double x, double y, 120 double dx, double dy, CharCode c, Unicode u); 121 122 // Merge <word> onto the end of <this>. 123 void merge(TextWord *word); 124 125 // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>), 126 // based on a primary-axis comparison, e.g., x ordering if rot=0. 127 int primaryCmp(TextWord *word); 128 129 // Return the distance along the primary axis between <this> and 130 // <word>. 131 double primaryDelta(TextWord *word); 132 133 static int cmpYX(const void *p1, const void *p2); 134 135 void visitSelection(TextSelectionVisitor *visitor, 136 PDFRectangle *selection, 137 SelectionStyle style); 138 139 // Get the TextFontInfo object associated with this word. getFontInfo()140 TextFontInfo *getFontInfo() { return font; } 141 142 // Get the next TextWord on the linked list. getNext()143 TextWord *getNext() { return next; } 144 145 #if TEXTOUT_WORD_LIST getLength()146 int getLength() { return len; } getChar(int idx)147 const Unicode *getChar(int idx) { return &text[idx]; } 148 GooString *getText(); getFontName()149 GooString *getFontName() { return font->fontName; } getColor(double * r,double * g,double * b)150 void getColor(double *r, double *g, double *b) 151 { *r = colorR; *g = colorG; *b = colorB; } getBBox(double * xMinA,double * yMinA,double * xMaxA,double * yMaxA)152 void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) 153 { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; } 154 void getCharBBox(int charIdx, double *xMinA, double *yMinA, 155 double *xMaxA, double *yMaxA); getFontSize()156 double getFontSize() { return fontSize; } getRotation()157 int getRotation() { return rot; } getCharPos()158 int getCharPos() { return charPos; } getCharLen()159 int getCharLen() { return charLen; } getSpaceAfter()160 GBool getSpaceAfter() { return spaceAfter; } 161 #endif isUnderlined()162 GBool isUnderlined() { return underlined; } getLink()163 Link *getLink() { return link; } getEdge(int i)164 double getEdge(int i) { return edge[i]; } getBaseline()165 double getBaseline () { return base; } hasSpaceAfter()166 GBool hasSpaceAfter () { return spaceAfter; } nextWord()167 TextWord* nextWord () { return next; }; 168 private: 169 170 int rot; // rotation, multiple of 90 degrees 171 // (0, 1, 2, or 3) 172 double xMin, xMax; // bounding box x coordinates 173 double yMin, yMax; // bounding box y coordinates 174 double base; // baseline x or y coordinate 175 Unicode *text; // the text 176 CharCode *charcode; // glyph indices 177 double *edge; // "near" edge x or y coord of each char 178 // (plus one extra entry for the last char) 179 int len; // length of text and edge arrays 180 int size; // size of text and edge arrays 181 int charPos; // character position (within content stream) 182 int charLen; // number of content stream characters in 183 // this word 184 TextFontInfo *font; // font information 185 double fontSize; // font size 186 GBool spaceAfter; // set if there is a space between this 187 // word and the next word on the line 188 TextWord *next; // next word in line 189 190 #if TEXTOUT_WORD_LIST 191 double colorR, // word color 192 colorG, 193 colorB; 194 #endif 195 196 GBool underlined; 197 Link *link; 198 199 friend class TextPool; 200 friend class TextLine; 201 friend class TextBlock; 202 friend class TextFlow; 203 friend class TextWordList; 204 friend class TextPage; 205 206 friend class TextSelectionPainter; 207 friend class TextSelectionDumper; 208 }; 209 210 //------------------------------------------------------------------------ 211 // TextPool 212 //------------------------------------------------------------------------ 213 214 class TextPool { 215 public: 216 217 TextPool(); 218 ~TextPool(); 219 getPool(int baseIdx)220 TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; } setPool(int baseIdx,TextWord * p)221 void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; } 222 223 int getBaseIdx(double base); 224 225 void addWord(TextWord *word); 226 227 private: 228 229 int minBaseIdx; // min baseline bucket index 230 int maxBaseIdx; // max baseline bucket index 231 TextWord **pool; // array of linked lists, one for each 232 // baseline value (multiple of 4 pts) 233 TextWord *cursor; // pointer to last-accessed word 234 int cursorBaseIdx; // baseline bucket index of last-accessed word 235 236 friend class TextBlock; 237 friend class TextPage; 238 }; 239 240 struct TextFlowData; 241 242 //------------------------------------------------------------------------ 243 // TextLine 244 //------------------------------------------------------------------------ 245 246 class TextLine { 247 public: 248 249 TextLine(TextBlock *blkA, int rotA, double baseA); 250 ~TextLine(); 251 252 void addWord(TextWord *word); 253 254 // Return the distance along the primary axis between <this> and 255 // <line>. 256 double primaryDelta(TextLine *line); 257 258 // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>), 259 // based on a primary-axis comparison, e.g., x ordering if rot=0. 260 int primaryCmp(TextLine *line); 261 262 // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>), 263 // based on a secondary-axis comparison of the baselines, e.g., y 264 // ordering if rot=0. 265 int secondaryCmp(TextLine *line); 266 267 int cmpYX(TextLine *line); 268 269 static int cmpXY(const void *p1, const void *p2); 270 271 void coalesce(UnicodeMap *uMap); 272 273 void visitSelection(TextSelectionVisitor *visitor, 274 PDFRectangle *selection, 275 SelectionStyle style); 276 277 // Get the head of the linked list of TextWords. getWords()278 TextWord *getWords() { return words; } 279 280 // Get the next TextLine on the linked list. getNext()281 TextLine *getNext() { return next; } 282 283 // Returns true if the last char of the line is a hyphen. isHyphenated()284 GBool isHyphenated() { return hyphenated; } 285 286 private: 287 288 TextBlock *blk; // parent block 289 int rot; // text rotation 290 double xMin, xMax; // bounding box x coordinates 291 double yMin, yMax; // bounding box y coordinates 292 double base; // baseline x or y coordinate 293 TextWord *words; // words in this line 294 TextWord *lastWord; // last word in this line 295 Unicode *text; // Unicode text of the line, including 296 // spaces between words 297 double *edge; // "near" edge x or y coord of each char 298 // (plus one extra entry for the last char) 299 int *col; // starting column number of each Unicode char 300 int len; // number of Unicode chars 301 int convertedLen; // total number of converted characters 302 GBool hyphenated; // set if last char is a hyphen 303 TextLine *next; // next line in block 304 Unicode *normalized; // normalized form of Unicode text 305 int normalized_len; // number of normalized Unicode chars 306 int *normalized_idx; // indices of normalized chars into Unicode text 307 308 friend class TextLineFrag; 309 friend class TextBlock; 310 friend class TextFlow; 311 friend class TextWordList; 312 friend class TextPage; 313 314 friend class TextSelectionPainter; 315 friend class TextSelectionSizer; 316 friend class TextSelectionDumper; 317 }; 318 319 //------------------------------------------------------------------------ 320 // TextBlock 321 //------------------------------------------------------------------------ 322 323 class TextBlock { 324 public: 325 326 TextBlock(TextPage *pageA, int rotA); 327 ~TextBlock(); 328 329 void addWord(TextWord *word); 330 331 void coalesce(UnicodeMap *uMap); 332 333 // Update this block's priMin and priMax values, looking at <blk>. 334 void updatePriMinMax(TextBlock *blk); 335 336 static int cmpXYPrimaryRot(const void *p1, const void *p2); 337 338 static int cmpYXPrimaryRot(const void *p1, const void *p2); 339 340 int primaryCmp(TextBlock *blk); 341 342 double secondaryDelta(TextBlock *blk); 343 344 // Returns true if <this> is below <blk>, relative to the page's 345 // primary rotation. 346 GBool isBelow(TextBlock *blk); 347 348 void visitSelection(TextSelectionVisitor *visitor, 349 PDFRectangle *selection, 350 SelectionStyle style); 351 352 // Get the head of the linked list of TextLines. getLines()353 TextLine *getLines() { return lines; } 354 355 // Get the next TextBlock on the linked list. getNext()356 TextBlock *getNext() { return next; } 357 getBBox(double * xMinA,double * yMinA,double * xMaxA,double * yMaxA)358 void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) 359 { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; } 360 getLineCount()361 int getLineCount() { return nLines; } 362 363 private: 364 365 GBool isBeforeByRule1(TextBlock *blk1); 366 GBool isBeforeByRepeatedRule1(TextBlock *blkList, TextBlock *blk1); 367 GBool isBeforeByRule2(TextBlock *blk1); 368 369 int visitDepthFirst(TextBlock *blkList, int pos1, 370 TextBlock **sorted, int sortPos, 371 GBool* visited); 372 373 TextPage *page; // the parent page 374 int rot; // text rotation 375 double xMin, xMax; // bounding box x coordinates 376 double yMin, yMax; // bounding box y coordinates 377 double priMin, priMax; // whitespace bounding box along primary axis 378 double ExMin, ExMax; // extended bounding box x coordinates 379 double EyMin, EyMax; // extended bounding box y coordinates 380 int tableId; // id of table to which this block belongs 381 GBool tableEnd; // is this block at end of line of actual table 382 383 TextPool *pool; // pool of words (used only until lines 384 // are built) 385 TextLine *lines; // linked list of lines 386 TextLine *curLine; // most recently added line 387 int nLines; // number of lines 388 int charCount; // number of characters in the block 389 int col; // starting column 390 int nColumns; // number of columns in the block 391 392 TextBlock *next; 393 TextBlock *stackNext; 394 395 friend class TextLine; 396 friend class TextLineFrag; 397 friend class TextFlow; 398 friend class TextWordList; 399 friend class TextPage; 400 friend class TextSelectionPainter; 401 friend class TextSelectionDumper; 402 }; 403 404 //------------------------------------------------------------------------ 405 // TextFlow 406 //------------------------------------------------------------------------ 407 408 class TextFlow { 409 public: 410 411 TextFlow(TextPage *pageA, TextBlock *blk); 412 ~TextFlow(); 413 414 // Add a block to the end of this flow. 415 void addBlock(TextBlock *blk); 416 417 // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1) 418 // it uses a font no larger than the last block added to the flow, 419 // and (2) it fits within the flow's [priMin, priMax] along the 420 // primary axis. 421 GBool blockFits(TextBlock *blk, TextBlock *prevBlk); 422 423 // Get the head of the linked list of TextBlocks. getBlocks()424 TextBlock *getBlocks() { return blocks; } 425 426 // Get the next TextFlow on the linked list. getNext()427 TextFlow *getNext() { return next; } 428 429 private: 430 431 TextPage *page; // the parent page 432 double xMin, xMax; // bounding box x coordinates 433 double yMin, yMax; // bounding box y coordinates 434 double priMin, priMax; // whitespace bounding box along primary axis 435 TextBlock *blocks; // blocks in flow 436 TextBlock *lastBlk; // last block in this flow 437 TextFlow *next; 438 439 friend class TextWordList; 440 friend class TextPage; 441 }; 442 443 #if TEXTOUT_WORD_LIST 444 445 //------------------------------------------------------------------------ 446 // TextWordList 447 //------------------------------------------------------------------------ 448 449 class TextWordList { 450 public: 451 452 // Build a flat word list, in content stream order (if 453 // text->rawOrder is true), physical layout order (if <physLayout> 454 // is true and text->rawOrder is false), or reading order (if both 455 // flags are false). 456 TextWordList(TextPage *text, GBool physLayout); 457 458 ~TextWordList(); 459 460 // Return the number of words on the list. 461 int getLength(); 462 463 // Return the <idx>th word from the list. 464 TextWord *get(int idx); 465 466 private: 467 468 GooList *words; // [TextWord] 469 }; 470 471 #endif // TEXTOUT_WORD_LIST 472 473 //------------------------------------------------------------------------ 474 // TextPage 475 //------------------------------------------------------------------------ 476 477 class TextPage { 478 public: 479 480 // Constructor. 481 TextPage(GBool rawOrderA); 482 483 void incRefCnt(); 484 void decRefCnt(); 485 486 // Start a new page. 487 void startPage(GfxState *state); 488 489 // End the current page. 490 void endPage(); 491 492 // Update the current font. 493 void updateFont(GfxState *state); 494 495 // Begin a new word. 496 void beginWord(GfxState *state, double x0, double y0); 497 498 // Add a character to the current word. 499 void addChar(GfxState *state, double x, double y, 500 double dx, double dy, 501 CharCode c, int nBytes, Unicode *u, int uLen); 502 503 // End the current word, sorting it into the list of words. 504 void endWord(); 505 506 // Add a word, sorting it into the list of words. 507 void addWord(TextWord *word); 508 509 // Add a (potential) underline. 510 void addUnderline(double x0, double y0, double x1, double y1); 511 512 // Add a hyperlink. 513 void addLink(int xMin, int yMin, int xMax, int yMax, Link *link); 514 515 // Coalesce strings that look like parts of the same line. 516 void coalesce(GBool physLayout, GBool doHTML); 517 518 // Find a string. If <startAtTop> is true, starts looking at the 519 // top of the page; else if <startAtLast> is true, starts looking 520 // immediately after the last find result; else starts looking at 521 // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the 522 // bottom of the page; else if <stopAtLast> is true, stops looking 523 // just before the last find result; else stops looking at 524 // <xMax>,<yMax>. 525 GBool findText(Unicode *s, int len, 526 GBool startAtTop, GBool stopAtBottom, 527 GBool startAtLast, GBool stopAtLast, 528 GBool caseSensitive, GBool backward, 529 double *xMin, double *yMin, 530 double *xMax, double *yMax); 531 532 // Get the text which is inside the specified rectangle. 533 GooString *getText(double xMin, double yMin, 534 double xMax, double yMax); 535 536 void visitSelection(TextSelectionVisitor *visitor, 537 PDFRectangle *selection, 538 SelectionStyle style); 539 540 void drawSelection(OutputDev *out, 541 double scale, 542 int rotation, 543 PDFRectangle *selection, 544 SelectionStyle style, 545 GfxColor *glyph_color, GfxColor *box_color); 546 547 GooList *getSelectionRegion(PDFRectangle *selection, 548 SelectionStyle style, 549 double scale); 550 551 GooString *getSelectionText(PDFRectangle *selection, 552 SelectionStyle style); 553 554 // Find a string by character position and length. If found, sets 555 // the text bounding rectangle and returns true; otherwise returns 556 // false. 557 GBool findCharRange(int pos, int length, 558 double *xMin, double *yMin, 559 double *xMax, double *yMax); 560 561 // Dump contents of page to a file. 562 void dump(void *outputStream, TextOutputFunc outputFunc, 563 GBool physLayout); 564 565 // Get the head of the linked list of TextFlows. getFlows()566 TextFlow *getFlows() { return flows; } 567 568 #if TEXTOUT_WORD_LIST 569 // Build a flat word list, in content stream order (if 570 // this->rawOrder is true), physical layout order (if <physLayout> 571 // is true and this->rawOrder is false), or reading order (if both 572 // flags are false). 573 TextWordList *makeWordList(GBool physLayout); 574 #endif 575 576 private: 577 578 // Destructor. 579 ~TextPage(); 580 581 void clear(); 582 void assignColumns(TextLineFrag *frags, int nFrags, GBool rot); 583 int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GooString *s); 584 585 GBool rawOrder; // keep text in content stream order 586 587 double pageWidth, pageHeight; // width and height of current page 588 TextWord *curWord; // currently active string 589 int charPos; // next character position (within content 590 // stream) 591 TextFontInfo *curFont; // current font 592 double curFontSize; // current font size 593 int nest; // current nesting level (for Type 3 fonts) 594 int nTinyChars; // number of "tiny" chars seen so far 595 GBool lastCharOverlap; // set if the last added char overlapped the 596 // previous char 597 598 TextPool *pools[4]; // a "pool" of TextWords for each rotation 599 TextFlow *flows; // linked list of flows 600 TextBlock **blocks; // array of blocks, in yx order 601 int nBlocks; // number of blocks 602 int primaryRot; // primary rotation 603 GBool primaryLR; // primary direction (true means L-to-R, 604 // false means R-to-L) 605 TextWord *rawWords; // list of words, in raw order (only if 606 // rawOrder is set) 607 TextWord *rawLastWord; // last word on rawWords list 608 609 GooList *fonts; // all font info objects used on this 610 // page [TextFontInfo] 611 612 double lastFindXMin, // coordinates of the last "find" result 613 lastFindYMin; 614 GBool haveLastFind; 615 616 GooList *underlines; // [TextUnderline] 617 GooList *links; // [TextLink] 618 619 int refCnt; 620 621 friend class TextLine; 622 friend class TextLineFrag; 623 friend class TextBlock; 624 friend class TextFlow; 625 friend class TextWordList; 626 friend class TextSelectionPainter; 627 friend class TextSelectionDumper; 628 }; 629 630 //------------------------------------------------------------------------ 631 // ActualText 632 //------------------------------------------------------------------------ 633 634 class ActualText { 635 public: 636 // Create an ActualText 637 ActualText(TextPage *out); 638 ~ActualText(); 639 640 void addChar(GfxState *state, double x, double y, 641 double dx, double dy, 642 CharCode c, int nBytes, Unicode *u, int uLen); 643 void beginMC(Dict *properties); 644 void endMC(GfxState *state); 645 646 private: 647 TextPage *text; 648 int actualTextBMCLevel; // > 0 when inside ActualText span. Incremented 649 // for each nested BMC inside the span. 650 GooString *actualText; // replacement text for the span 651 GBool newActualTextSpan; // true at start of span. used to init the extent 652 double actualText_x, actualText_y; // extent of the text inside the span 653 double actualText_dx, actualText_dy; 654 }; 655 656 657 //------------------------------------------------------------------------ 658 // TextOutputDev 659 //------------------------------------------------------------------------ 660 661 class TextOutputDev: public OutputDev { 662 public: 663 664 // Open a text output file. If <fileName> is NULL, no file is 665 // written (this is useful, e.g., for searching text). If 666 // <physLayoutA> is true, the original physical layout of the text 667 // is maintained. If <rawOrder> is true, the text is kept in 668 // content stream order. 669 TextOutputDev(char *fileName, GBool physLayoutA, 670 GBool rawOrderA, GBool append); 671 672 // Create a TextOutputDev which will write to a generic stream. If 673 // <physLayoutA> is true, the original physical layout of the text 674 // is maintained. If <rawOrder> is true, the text is kept in 675 // content stream order. 676 TextOutputDev(TextOutputFunc func, void *stream, 677 GBool physLayoutA, GBool rawOrderA); 678 679 // Destructor. 680 virtual ~TextOutputDev(); 681 682 // Check if file was successfully created. isOk()683 virtual GBool isOk() { return ok; } 684 685 //---- get info about output device 686 687 // Does this device use upside-down coordinates? 688 // (Upside-down means (0,0) is the top left corner of the page.) upsideDown()689 virtual GBool upsideDown() { return gTrue; } 690 691 // Does this device use drawChar() or drawString()? useDrawChar()692 virtual GBool useDrawChar() { return gTrue; } 693 694 // Does this device use beginType3Char/endType3Char? Otherwise, 695 // text in Type 3 fonts will be drawn with drawChar/drawString. interpretType3Chars()696 virtual GBool interpretType3Chars() { return gFalse; } 697 698 // Does this device need non-text content? needNonText()699 virtual GBool needNonText() { return gFalse; } 700 701 //----- initialization and control 702 703 // Start a page. 704 virtual void startPage(int pageNum, GfxState *state); 705 706 // End a page. 707 virtual void endPage(); 708 709 //----- update text state 710 virtual void updateFont(GfxState *state); 711 712 //----- text drawing 713 virtual void beginString(GfxState *state, GooString *s); 714 virtual void endString(GfxState *state); 715 virtual void drawChar(GfxState *state, double x, double y, 716 double dx, double dy, 717 double originX, double originY, 718 CharCode c, int nBytes, Unicode *u, int uLen); 719 720 //----- grouping operators 721 virtual void beginMarkedContent(char *name, Dict *properties); 722 virtual void endMarkedContent(GfxState *state); 723 724 //----- path painting 725 virtual void stroke(GfxState *state); 726 virtual void fill(GfxState *state); 727 virtual void eoFill(GfxState *state); 728 729 //----- link borders 730 virtual void processLink(Link *link, Catalog *catalog); 731 732 //----- special access 733 734 // Find a string. If <startAtTop> is true, starts looking at the 735 // top of the page; else if <startAtLast> is true, starts looking 736 // immediately after the last find result; else starts looking at 737 // <xMin>,<yMin>. If <stopAtBottom> is true, stops looking at the 738 // bottom of the page; else if <stopAtLast> is true, stops looking 739 // just before the last find result; else stops looking at 740 // <xMax>,<yMax>. 741 GBool findText(Unicode *s, int len, 742 GBool startAtTop, GBool stopAtBottom, 743 GBool startAtLast, GBool stopAtLast, 744 GBool caseSensitive, GBool backward, 745 double *xMin, double *yMin, 746 double *xMax, double *yMax); 747 748 // Get the text which is inside the specified rectangle. 749 GooString *getText(double xMin, double yMin, 750 double xMax, double yMax); 751 752 // Find a string by character position and length. If found, sets 753 // the text bounding rectangle and returns true; otherwise returns 754 // false. 755 GBool findCharRange(int pos, int length, 756 double *xMin, double *yMin, 757 double *xMax, double *yMax); 758 759 void drawSelection(OutputDev *out, double scale, int rotation, 760 PDFRectangle *selection, 761 SelectionStyle style, 762 GfxColor *glyph_color, GfxColor *box_color); 763 764 GooList *getSelectionRegion(PDFRectangle *selection, 765 SelectionStyle style, 766 double scale); 767 768 GooString *getSelectionText(PDFRectangle *selection, 769 SelectionStyle style); 770 771 #if TEXTOUT_WORD_LIST 772 // Build a flat word list, in content stream order (if 773 // this->rawOrder is true), physical layout order (if 774 // this->physLayout is true and this->rawOrder is false), or reading 775 // order (if both flags are false). 776 TextWordList *makeWordList(); 777 #endif 778 779 // Returns the TextPage object for the last rasterized page, 780 // transferring ownership to the caller. 781 TextPage *takeText(); 782 783 // Turn extra processing for HTML conversion on or off. enableHTMLExtras(GBool doHTMLA)784 void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; } 785 786 private: 787 788 TextOutputFunc outputFunc; // output function 789 void *outputStream; // output stream 790 GBool needClose; // need to close the output file? 791 // (only if outputStream is a FILE*) 792 TextPage *text; // text for the current page 793 GBool physLayout; // maintain original physical layout when 794 // dumping text 795 GBool rawOrder; // keep text in content stream order 796 GBool doHTML; // extra processing for HTML conversion 797 GBool ok; // set up ok? 798 799 ActualText *actualText; 800 }; 801 802 #endif 803