1 //========================================================================
2 //
3 // TextOutputDev.h
4 //
5 // Copyright 1997-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // Copyright (C) 2005-2007 Kristian Høgsberg <krh@redhat.com>
14 // Copyright (C) 2006 Ed Catmur <ed@catmur.co.uk>
15 // Copyright (C) 2007-2008 Carlos Garcia Campos <carlosgc@gnome.org>
16 // Copyright (C) 2007 Adrian Johnson <ajohnson@redneon.com>
17 // Copyright (C) 2008, 2010 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2010 Brian Ewins <brian.ewins@gmail.com>
19 //
20 // To see a description of the changes please see the Changelog file that
21 // came with your tarball or type make ChangeLog if you are building from git
22 //
23 //========================================================================
24 
25 #ifndef TEXTOUTPUTDEV_H
26 #define TEXTOUTPUTDEV_H
27 
28 #ifdef USE_GCC_PRAGMAS
29 #pragma interface
30 #endif
31 
32 #include "poppler-config.h"
33 #include <stdio.h>
34 #include "goo/gtypes.h"
35 #include "GfxFont.h"
36 #include "GfxState.h"
37 #include "OutputDev.h"
38 
39 class GooString;
40 class GooList;
41 class Gfx;
42 class GfxFont;
43 class GfxState;
44 class UnicodeMap;
45 class Link;
46 
47 class TextWord;
48 class TextPool;
49 class TextLine;
50 class TextLineFrag;
51 class TextBlock;
52 class TextFlow;
53 class TextWordList;
54 class TextPage;
55 class TextSelectionVisitor;
56 
57 //------------------------------------------------------------------------
58 
59 typedef void (*TextOutputFunc)(void *stream, char *text, int len);
60 
61 enum SelectionStyle {
62   selectionStyleGlyph,
63   selectionStyleWord,
64   selectionStyleLine
65 };
66 
67 //------------------------------------------------------------------------
68 // TextFontInfo
69 //------------------------------------------------------------------------
70 
71 class TextFontInfo {
72 public:
73 
74   TextFontInfo(GfxState *state);
75   ~TextFontInfo();
76 
77   GBool matches(GfxState *state);
78 
79 #if TEXTOUT_WORD_LIST
80   // Get the font name (which may be NULL).
getFontName()81   GooString *getFontName() { return fontName; }
82 
83   // Get font descriptor flags.
isFixedWidth()84   GBool isFixedWidth() { return flags & fontFixedWidth; }
isSerif()85   GBool isSerif() { return flags & fontSerif; }
isSymbolic()86   GBool isSymbolic() { return flags & fontSymbolic; }
isItalic()87   GBool isItalic() { return flags & fontItalic; }
isBold()88   GBool isBold() { return flags & fontBold; }
89 #endif
90 
91 private:
92 
93   GfxFont *gfxFont;
94 #if TEXTOUT_WORD_LIST
95   GooString *fontName;
96   int flags;
97 #endif
98 
99   friend class TextWord;
100   friend class TextPage;
101   friend class TextSelectionPainter;
102 };
103 
104 //------------------------------------------------------------------------
105 // TextWord
106 //------------------------------------------------------------------------
107 
108 class TextWord {
109 public:
110 
111   // Constructor.
112   TextWord(GfxState *state, int rotA, double x0, double y0,
113 	   int charPosA, TextFontInfo *fontA, double fontSize);
114 
115   // Destructor.
116   ~TextWord();
117 
118   // Add a character to the word.
119   void addChar(GfxState *state, double x, double y,
120 	       double dx, double dy, CharCode c, Unicode u);
121 
122   // Merge <word> onto the end of <this>.
123   void merge(TextWord *word);
124 
125   // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>),
126   // based on a primary-axis comparison, e.g., x ordering if rot=0.
127   int primaryCmp(TextWord *word);
128 
129   // Return the distance along the primary axis between <this> and
130   // <word>.
131   double primaryDelta(TextWord *word);
132 
133   static int cmpYX(const void *p1, const void *p2);
134 
135   void visitSelection(TextSelectionVisitor *visitor,
136 		      PDFRectangle *selection,
137 		      SelectionStyle style);
138 
139   // Get the TextFontInfo object associated with this word.
getFontInfo()140   TextFontInfo *getFontInfo() { return font; }
141 
142   // Get the next TextWord on the linked list.
getNext()143   TextWord *getNext() { return next; }
144 
145 #if TEXTOUT_WORD_LIST
getLength()146   int getLength() { return len; }
getChar(int idx)147   const Unicode *getChar(int idx) { return &text[idx]; }
148   GooString *getText();
getFontName()149   GooString *getFontName() { return font->fontName; }
getColor(double * r,double * g,double * b)150   void getColor(double *r, double *g, double *b)
151     { *r = colorR; *g = colorG; *b = colorB; }
getBBox(double * xMinA,double * yMinA,double * xMaxA,double * yMaxA)152   void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
153     { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
154   void getCharBBox(int charIdx, double *xMinA, double *yMinA,
155 		   double *xMaxA, double *yMaxA);
getFontSize()156   double getFontSize() { return fontSize; }
getRotation()157   int getRotation() { return rot; }
getCharPos()158   int getCharPos() { return charPos; }
getCharLen()159   int getCharLen() { return charLen; }
getSpaceAfter()160   GBool getSpaceAfter() { return spaceAfter; }
161 #endif
isUnderlined()162   GBool isUnderlined() { return underlined; }
getLink()163   Link *getLink() { return link; }
getEdge(int i)164   double getEdge(int i) { return edge[i]; }
getBaseline()165   double getBaseline () { return base; }
hasSpaceAfter()166   GBool hasSpaceAfter  () { return spaceAfter; }
nextWord()167   TextWord* nextWord () { return next; };
168 private:
169 
170   int rot;			// rotation, multiple of 90 degrees
171 				//   (0, 1, 2, or 3)
172   double xMin, xMax;		// bounding box x coordinates
173   double yMin, yMax;		// bounding box y coordinates
174   double base;			// baseline x or y coordinate
175   Unicode *text;		// the text
176   CharCode *charcode;		// glyph indices
177   double *edge;			// "near" edge x or y coord of each char
178 				//   (plus one extra entry for the last char)
179   int len;			// length of text and edge arrays
180   int size;			// size of text and edge arrays
181   int charPos;                  // character position (within content stream)
182   int charLen;                  // number of content stream characters in
183                                 //   this word
184   TextFontInfo *font;		// font information
185   double fontSize;		// font size
186   GBool spaceAfter;		// set if there is a space between this
187 				//   word and the next word on the line
188   TextWord *next;		// next word in line
189 
190 #if TEXTOUT_WORD_LIST
191   double colorR,		// word color
192          colorG,
193          colorB;
194 #endif
195 
196   GBool underlined;
197   Link *link;
198 
199   friend class TextPool;
200   friend class TextLine;
201   friend class TextBlock;
202   friend class TextFlow;
203   friend class TextWordList;
204   friend class TextPage;
205 
206   friend class TextSelectionPainter;
207   friend class TextSelectionDumper;
208 };
209 
210 //------------------------------------------------------------------------
211 // TextPool
212 //------------------------------------------------------------------------
213 
214 class TextPool {
215 public:
216 
217   TextPool();
218   ~TextPool();
219 
getPool(int baseIdx)220   TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
setPool(int baseIdx,TextWord * p)221   void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }
222 
223   int getBaseIdx(double base);
224 
225   void addWord(TextWord *word);
226 
227 private:
228 
229   int minBaseIdx;		// min baseline bucket index
230   int maxBaseIdx;		// max baseline bucket index
231   TextWord **pool;		// array of linked lists, one for each
232 				//   baseline value (multiple of 4 pts)
233   TextWord *cursor;		// pointer to last-accessed word
234   int cursorBaseIdx;		// baseline bucket index of last-accessed word
235 
236   friend class TextBlock;
237   friend class TextPage;
238 };
239 
240 struct TextFlowData;
241 
242 //------------------------------------------------------------------------
243 // TextLine
244 //------------------------------------------------------------------------
245 
246 class TextLine {
247 public:
248 
249   TextLine(TextBlock *blkA, int rotA, double baseA);
250   ~TextLine();
251 
252   void addWord(TextWord *word);
253 
254   // Return the distance along the primary axis between <this> and
255   // <line>.
256   double primaryDelta(TextLine *line);
257 
258   // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
259   // based on a primary-axis comparison, e.g., x ordering if rot=0.
260   int primaryCmp(TextLine *line);
261 
262   // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
263   // based on a secondary-axis comparison of the baselines, e.g., y
264   // ordering if rot=0.
265   int secondaryCmp(TextLine *line);
266 
267   int cmpYX(TextLine *line);
268 
269   static int cmpXY(const void *p1, const void *p2);
270 
271   void coalesce(UnicodeMap *uMap);
272 
273   void visitSelection(TextSelectionVisitor *visitor,
274 		      PDFRectangle *selection,
275 		      SelectionStyle style);
276 
277   // Get the head of the linked list of TextWords.
getWords()278   TextWord *getWords() { return words; }
279 
280   // Get the next TextLine on the linked list.
getNext()281   TextLine *getNext() { return next; }
282 
283   // Returns true if the last char of the line is a hyphen.
isHyphenated()284   GBool isHyphenated() { return hyphenated; }
285 
286 private:
287 
288   TextBlock *blk;		// parent block
289   int rot;			// text rotation
290   double xMin, xMax;		// bounding box x coordinates
291   double yMin, yMax;		// bounding box y coordinates
292   double base;			// baseline x or y coordinate
293   TextWord *words;		// words in this line
294   TextWord *lastWord;		// last word in this line
295   Unicode *text;		// Unicode text of the line, including
296 				//   spaces between words
297   double *edge;			// "near" edge x or y coord of each char
298 				//   (plus one extra entry for the last char)
299   int *col;			// starting column number of each Unicode char
300   int len;			// number of Unicode chars
301   int convertedLen;		// total number of converted characters
302   GBool hyphenated;		// set if last char is a hyphen
303   TextLine *next;		// next line in block
304   Unicode *normalized;		// normalized form of Unicode text
305   int normalized_len;		// number of normalized Unicode chars
306   int *normalized_idx;		// indices of normalized chars into Unicode text
307 
308   friend class TextLineFrag;
309   friend class TextBlock;
310   friend class TextFlow;
311   friend class TextWordList;
312   friend class TextPage;
313 
314   friend class TextSelectionPainter;
315   friend class TextSelectionSizer;
316   friend class TextSelectionDumper;
317 };
318 
319 //------------------------------------------------------------------------
320 // TextBlock
321 //------------------------------------------------------------------------
322 
323 class TextBlock {
324 public:
325 
326   TextBlock(TextPage *pageA, int rotA);
327   ~TextBlock();
328 
329   void addWord(TextWord *word);
330 
331   void coalesce(UnicodeMap *uMap);
332 
333   // Update this block's priMin and priMax values, looking at <blk>.
334   void updatePriMinMax(TextBlock *blk);
335 
336   static int cmpXYPrimaryRot(const void *p1, const void *p2);
337 
338   static int cmpYXPrimaryRot(const void *p1, const void *p2);
339 
340   int primaryCmp(TextBlock *blk);
341 
342   double secondaryDelta(TextBlock *blk);
343 
344   // Returns true if <this> is below <blk>, relative to the page's
345   // primary rotation.
346   GBool isBelow(TextBlock *blk);
347 
348   void visitSelection(TextSelectionVisitor *visitor,
349 		      PDFRectangle *selection,
350 		      SelectionStyle style);
351 
352   // Get the head of the linked list of TextLines.
getLines()353   TextLine *getLines() { return lines; }
354 
355   // Get the next TextBlock on the linked list.
getNext()356   TextBlock *getNext() { return next; }
357 
getBBox(double * xMinA,double * yMinA,double * xMaxA,double * yMaxA)358   void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
359     { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
360 
getLineCount()361   int getLineCount() { return nLines; }
362 
363 private:
364 
365   GBool isBeforeByRule1(TextBlock *blk1);
366   GBool isBeforeByRepeatedRule1(TextBlock *blkList, TextBlock *blk1);
367   GBool isBeforeByRule2(TextBlock *blk1);
368 
369   int visitDepthFirst(TextBlock *blkList, int pos1,
370 		      TextBlock **sorted, int sortPos,
371 		      GBool* visited);
372 
373   TextPage *page;		// the parent page
374   int rot;			// text rotation
375   double xMin, xMax;		// bounding box x coordinates
376   double yMin, yMax;		// bounding box y coordinates
377   double priMin, priMax;	// whitespace bounding box along primary axis
378   double ExMin, ExMax;		// extended bounding box x coordinates
379   double EyMin, EyMax;		// extended bounding box y coordinates
380   int tableId;			// id of table to which this block belongs
381   GBool tableEnd;		// is this block at end of line of actual table
382 
383   TextPool *pool;		// pool of words (used only until lines
384 				//   are built)
385   TextLine *lines;		// linked list of lines
386   TextLine *curLine;		// most recently added line
387   int nLines;			// number of lines
388   int charCount;		// number of characters in the block
389   int col;			// starting column
390   int nColumns;			// number of columns in the block
391 
392   TextBlock *next;
393   TextBlock *stackNext;
394 
395   friend class TextLine;
396   friend class TextLineFrag;
397   friend class TextFlow;
398   friend class TextWordList;
399   friend class TextPage;
400   friend class TextSelectionPainter;
401   friend class TextSelectionDumper;
402 };
403 
404 //------------------------------------------------------------------------
405 // TextFlow
406 //------------------------------------------------------------------------
407 
408 class TextFlow {
409 public:
410 
411   TextFlow(TextPage *pageA, TextBlock *blk);
412   ~TextFlow();
413 
414   // Add a block to the end of this flow.
415   void addBlock(TextBlock *blk);
416 
417   // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1)
418   // it uses a font no larger than the last block added to the flow,
419   // and (2) it fits within the flow's [priMin, priMax] along the
420   // primary axis.
421   GBool blockFits(TextBlock *blk, TextBlock *prevBlk);
422 
423   // Get the head of the linked list of TextBlocks.
getBlocks()424   TextBlock *getBlocks() { return blocks; }
425 
426   // Get the next TextFlow on the linked list.
getNext()427   TextFlow *getNext() { return next; }
428 
429 private:
430 
431   TextPage *page;		// the parent page
432   double xMin, xMax;		// bounding box x coordinates
433   double yMin, yMax;		// bounding box y coordinates
434   double priMin, priMax;	// whitespace bounding box along primary axis
435   TextBlock *blocks;		// blocks in flow
436   TextBlock *lastBlk;		// last block in this flow
437   TextFlow *next;
438 
439   friend class TextWordList;
440   friend class TextPage;
441 };
442 
443 #if TEXTOUT_WORD_LIST
444 
445 //------------------------------------------------------------------------
446 // TextWordList
447 //------------------------------------------------------------------------
448 
449 class TextWordList {
450 public:
451 
452   // Build a flat word list, in content stream order (if
453   // text->rawOrder is true), physical layout order (if <physLayout>
454   // is true and text->rawOrder is false), or reading order (if both
455   // flags are false).
456   TextWordList(TextPage *text, GBool physLayout);
457 
458   ~TextWordList();
459 
460   // Return the number of words on the list.
461   int getLength();
462 
463   // Return the <idx>th word from the list.
464   TextWord *get(int idx);
465 
466 private:
467 
468   GooList *words;			// [TextWord]
469 };
470 
471 #endif // TEXTOUT_WORD_LIST
472 
473 //------------------------------------------------------------------------
474 // TextPage
475 //------------------------------------------------------------------------
476 
477 class TextPage {
478 public:
479 
480   // Constructor.
481   TextPage(GBool rawOrderA);
482 
483   void incRefCnt();
484   void decRefCnt();
485 
486   // Start a new page.
487   void startPage(GfxState *state);
488 
489   // End the current page.
490   void endPage();
491 
492   // Update the current font.
493   void updateFont(GfxState *state);
494 
495   // Begin a new word.
496   void beginWord(GfxState *state, double x0, double y0);
497 
498   // Add a character to the current word.
499   void addChar(GfxState *state, double x, double y,
500 	       double dx, double dy,
501 	       CharCode c, int nBytes, Unicode *u, int uLen);
502 
503   // End the current word, sorting it into the list of words.
504   void endWord();
505 
506   // Add a word, sorting it into the list of words.
507   void addWord(TextWord *word);
508 
509   // Add a (potential) underline.
510   void addUnderline(double x0, double y0, double x1, double y1);
511 
512   // Add a hyperlink.
513   void addLink(int xMin, int yMin, int xMax, int yMax, Link *link);
514 
515   // Coalesce strings that look like parts of the same line.
516   void coalesce(GBool physLayout, GBool doHTML);
517 
518   // Find a string.  If <startAtTop> is true, starts looking at the
519   // top of the page; else if <startAtLast> is true, starts looking
520   // immediately after the last find result; else starts looking at
521   // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the
522   // bottom of the page; else if <stopAtLast> is true, stops looking
523   // just before the last find result; else stops looking at
524   // <xMax>,<yMax>.
525   GBool findText(Unicode *s, int len,
526 		 GBool startAtTop, GBool stopAtBottom,
527 		 GBool startAtLast, GBool stopAtLast,
528 		 GBool caseSensitive, GBool backward,
529 		 double *xMin, double *yMin,
530 		 double *xMax, double *yMax);
531 
532   // Get the text which is inside the specified rectangle.
533   GooString *getText(double xMin, double yMin,
534 		     double xMax, double yMax);
535 
536   void visitSelection(TextSelectionVisitor *visitor,
537 		      PDFRectangle *selection,
538 		      SelectionStyle style);
539 
540   void drawSelection(OutputDev *out,
541 		     double scale,
542 		     int rotation,
543 		     PDFRectangle *selection,
544 		     SelectionStyle style,
545 		     GfxColor *glyph_color, GfxColor *box_color);
546 
547   GooList *getSelectionRegion(PDFRectangle *selection,
548 			      SelectionStyle style,
549 			      double scale);
550 
551   GooString *getSelectionText(PDFRectangle *selection,
552 			      SelectionStyle style);
553 
554   // Find a string by character position and length.  If found, sets
555   // the text bounding rectangle and returns true; otherwise returns
556   // false.
557   GBool findCharRange(int pos, int length,
558 		      double *xMin, double *yMin,
559 		      double *xMax, double *yMax);
560 
561   // Dump contents of page to a file.
562   void dump(void *outputStream, TextOutputFunc outputFunc,
563 	    GBool physLayout);
564 
565   // Get the head of the linked list of TextFlows.
getFlows()566   TextFlow *getFlows() { return flows; }
567 
568 #if TEXTOUT_WORD_LIST
569   // Build a flat word list, in content stream order (if
570   // this->rawOrder is true), physical layout order (if <physLayout>
571   // is true and this->rawOrder is false), or reading order (if both
572   // flags are false).
573   TextWordList *makeWordList(GBool physLayout);
574 #endif
575 
576 private:
577 
578   // Destructor.
579   ~TextPage();
580 
581   void clear();
582   void assignColumns(TextLineFrag *frags, int nFrags, GBool rot);
583   int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GooString *s);
584 
585   GBool rawOrder;		// keep text in content stream order
586 
587   double pageWidth, pageHeight;	// width and height of current page
588   TextWord *curWord;		// currently active string
589   int charPos;			// next character position (within content
590 				//   stream)
591   TextFontInfo *curFont;	// current font
592   double curFontSize;		// current font size
593   int nest;			// current nesting level (for Type 3 fonts)
594   int nTinyChars;		// number of "tiny" chars seen so far
595   GBool lastCharOverlap;	// set if the last added char overlapped the
596 				//   previous char
597 
598   TextPool *pools[4];		// a "pool" of TextWords for each rotation
599   TextFlow *flows;		// linked list of flows
600   TextBlock **blocks;		// array of blocks, in yx order
601   int nBlocks;			// number of blocks
602   int primaryRot;		// primary rotation
603   GBool primaryLR;		// primary direction (true means L-to-R,
604 				//   false means R-to-L)
605   TextWord *rawWords;		// list of words, in raw order (only if
606 				//   rawOrder is set)
607   TextWord *rawLastWord;	// last word on rawWords list
608 
609   GooList *fonts;			// all font info objects used on this
610 				//   page [TextFontInfo]
611 
612   double lastFindXMin,		// coordinates of the last "find" result
613          lastFindYMin;
614   GBool haveLastFind;
615 
616   GooList *underlines;		// [TextUnderline]
617   GooList *links;		// [TextLink]
618 
619   int refCnt;
620 
621   friend class TextLine;
622   friend class TextLineFrag;
623   friend class TextBlock;
624   friend class TextFlow;
625   friend class TextWordList;
626   friend class TextSelectionPainter;
627   friend class TextSelectionDumper;
628 };
629 
630 //------------------------------------------------------------------------
631 // ActualText
632 //------------------------------------------------------------------------
633 
634 class ActualText {
635 public:
636   // Create an ActualText
637   ActualText(TextPage *out);
638   ~ActualText();
639 
640   void addChar(GfxState *state, double x, double y,
641 	       double dx, double dy,
642 	       CharCode c, int nBytes, Unicode *u, int uLen);
643   void beginMC(Dict *properties);
644   void endMC(GfxState *state);
645 
646 private:
647   TextPage *text;
648   int actualTextBMCLevel;       // > 0 when inside ActualText span. Incremented
649                                 // for each nested BMC inside the span.
650   GooString *actualText;        // replacement text for the span
651   GBool newActualTextSpan;      // true at start of span. used to init the extent
652   double actualText_x, actualText_y; // extent of the text inside the span
653   double actualText_dx, actualText_dy;
654 };
655 
656 
657 //------------------------------------------------------------------------
658 // TextOutputDev
659 //------------------------------------------------------------------------
660 
661 class TextOutputDev: public OutputDev {
662 public:
663 
664   // Open a text output file.  If <fileName> is NULL, no file is
665   // written (this is useful, e.g., for searching text).  If
666   // <physLayoutA> is true, the original physical layout of the text
667   // is maintained.  If <rawOrder> is true, the text is kept in
668   // content stream order.
669   TextOutputDev(char *fileName, GBool physLayoutA,
670 		GBool rawOrderA, GBool append);
671 
672   // Create a TextOutputDev which will write to a generic stream.  If
673   // <physLayoutA> is true, the original physical layout of the text
674   // is maintained.  If <rawOrder> is true, the text is kept in
675   // content stream order.
676   TextOutputDev(TextOutputFunc func, void *stream,
677 		GBool physLayoutA, GBool rawOrderA);
678 
679   // Destructor.
680   virtual ~TextOutputDev();
681 
682   // Check if file was successfully created.
isOk()683   virtual GBool isOk() { return ok; }
684 
685   //---- get info about output device
686 
687   // Does this device use upside-down coordinates?
688   // (Upside-down means (0,0) is the top left corner of the page.)
upsideDown()689   virtual GBool upsideDown() { return gTrue; }
690 
691   // Does this device use drawChar() or drawString()?
useDrawChar()692   virtual GBool useDrawChar() { return gTrue; }
693 
694   // Does this device use beginType3Char/endType3Char?  Otherwise,
695   // text in Type 3 fonts will be drawn with drawChar/drawString.
interpretType3Chars()696   virtual GBool interpretType3Chars() { return gFalse; }
697 
698   // Does this device need non-text content?
needNonText()699   virtual GBool needNonText() { return gFalse; }
700 
701   //----- initialization and control
702 
703   // Start a page.
704   virtual void startPage(int pageNum, GfxState *state);
705 
706   // End a page.
707   virtual void endPage();
708 
709   //----- update text state
710   virtual void updateFont(GfxState *state);
711 
712   //----- text drawing
713   virtual void beginString(GfxState *state, GooString *s);
714   virtual void endString(GfxState *state);
715   virtual void drawChar(GfxState *state, double x, double y,
716 			double dx, double dy,
717 			double originX, double originY,
718 			CharCode c, int nBytes, Unicode *u, int uLen);
719 
720   //----- grouping operators
721   virtual void beginMarkedContent(char *name, Dict *properties);
722   virtual void endMarkedContent(GfxState *state);
723 
724   //----- path painting
725   virtual void stroke(GfxState *state);
726   virtual void fill(GfxState *state);
727   virtual void eoFill(GfxState *state);
728 
729   //----- link borders
730   virtual void processLink(Link *link, Catalog *catalog);
731 
732   //----- special access
733 
734   // Find a string.  If <startAtTop> is true, starts looking at the
735   // top of the page; else if <startAtLast> is true, starts looking
736   // immediately after the last find result; else starts looking at
737   // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the
738   // bottom of the page; else if <stopAtLast> is true, stops looking
739   // just before the last find result; else stops looking at
740   // <xMax>,<yMax>.
741   GBool findText(Unicode *s, int len,
742 		 GBool startAtTop, GBool stopAtBottom,
743 		 GBool startAtLast, GBool stopAtLast,
744 		 GBool caseSensitive, GBool backward,
745 		 double *xMin, double *yMin,
746 		 double *xMax, double *yMax);
747 
748   // Get the text which is inside the specified rectangle.
749   GooString *getText(double xMin, double yMin,
750 		   double xMax, double yMax);
751 
752   // Find a string by character position and length.  If found, sets
753   // the text bounding rectangle and returns true; otherwise returns
754   // false.
755   GBool findCharRange(int pos, int length,
756 		      double *xMin, double *yMin,
757 		      double *xMax, double *yMax);
758 
759   void drawSelection(OutputDev *out, double scale, int rotation,
760 		     PDFRectangle *selection,
761 		     SelectionStyle style,
762 		     GfxColor *glyph_color, GfxColor *box_color);
763 
764   GooList *getSelectionRegion(PDFRectangle *selection,
765 			      SelectionStyle style,
766 			      double scale);
767 
768   GooString *getSelectionText(PDFRectangle *selection,
769 			      SelectionStyle style);
770 
771 #if TEXTOUT_WORD_LIST
772   // Build a flat word list, in content stream order (if
773   // this->rawOrder is true), physical layout order (if
774   // this->physLayout is true and this->rawOrder is false), or reading
775   // order (if both flags are false).
776   TextWordList *makeWordList();
777 #endif
778 
779   // Returns the TextPage object for the last rasterized page,
780   // transferring ownership to the caller.
781   TextPage *takeText();
782 
783   // Turn extra processing for HTML conversion on or off.
enableHTMLExtras(GBool doHTMLA)784   void enableHTMLExtras(GBool doHTMLA) { doHTML = doHTMLA; }
785 
786 private:
787 
788   TextOutputFunc outputFunc;	// output function
789   void *outputStream;		// output stream
790   GBool needClose;		// need to close the output file?
791 				//   (only if outputStream is a FILE*)
792   TextPage *text;		// text for the current page
793   GBool physLayout;		// maintain original physical layout when
794 				//   dumping text
795   GBool rawOrder;		// keep text in content stream order
796   GBool doHTML;			// extra processing for HTML conversion
797   GBool ok;			// set up ok?
798 
799   ActualText *actualText;
800 };
801 
802 #endif
803