1 /*
2 For general Scribus (>=1.3.2) copyright and licensing information please refer
3 to the COPYING file provided with the program. Following this notice may exist
4 a copyright and/or license notice that predates the release of Scribus 1.3.2
5 for which a new license (GPL+exception) is in place.
6 */
7 #ifndef PDFTEXTRECOGNITION_H
8 #define PDFTEXTRECOGNITION_H
9 
10 #include <QSizeF>
11 #include <QStack>
12 #include <QString>
13 
14 #include "pageitem.h"
15 #include "importpdfconfig.h"
16 #include "slaoutput.h"
17 
18 #include <poppler/GfxState.h>
19 #include <poppler/CharCodeToUnicode.h>
20 
21 /* PDF TextBox Framework */
22 /*
23 * Holds all the details for each glyph in the text imported from the pdf file.
24 *
25 */
26 struct PdfGlyph
27 {
28 	double dx;  // X advance value
29 	double dy;  // Y advance value
30 	double rise;    // Text rise parameter
31 	QChar code;   // UTF-16 coded character
32 };
33 
34 
35 class PdfTextRegionLine
36 {
37 public:
38 	qreal maxHeight = {};
39 	//we can probably use maxHeight for this.
40 	qreal width = {};
41 	int glyphIndex = {};
42 	QPointF baseOrigin = QPointF({}, {});
43 	std::vector<PdfTextRegionLine> segments = std::vector<PdfTextRegionLine>();
44 
45 };
46 
47 class PdfTextRegion
48 {
49 public:
50 	enum class LineType
51 	{
52 		FIRSTPOINT,
53 		SAMELINE,
54 		STYLESUPERSCRIPT,
55 		STYLENORMALRETURN,
56 		STYLEBELOWBASELINE,
57 		NEWLINE,
58 		ENDOFLINE, //TODO: Implement an end of line test
59 		FAIL
60 	};
61 #
62 	/*
63 * the bounding box shape splines in percentage of width and height. In this case 100% as we want to clip shape to be the full TextBox width and height. */
64 	static constexpr double boundingBoxShape[32] = {
65 		0.0, 0.0,
66 		0.0, 0.0,
67 		100.0, 0.0,
68 		100.0, 0.0,
69 		100.0, 0.0,
70 		100.0, 0.0,
71 		100.0, 100.0,
72 		100.0, 100.0,
73 		100.0, 100.0,
74 		100.0, 100.0,
75 		0.0, 100.0,
76 		0.0, 100.0,
77 		0.0, 100.0,
78 		0.0, 100.0,
79 		0.0, 0.0,
80 		0.0, 0.0
81 	};
82 
83 	QPointF pdfTextRegionBasenOrigin = QPointF({}, {});
84 	qreal maxHeight = {};
85 	qreal lineSpacing = { 1 };
86 	std::vector<PdfTextRegionLine> pdfTextRegionLines = std::vector<PdfTextRegionLine>();
87 	qreal maxWidth = {};
88 	QPointF lineBaseXY = QPointF({ }, { }); //updated with the best match left value from all the textRegionLines and the best bottom value from the textRegionLines.segments;
89 	QPointF lastXY = QPointF({}, {});
90 	std::vector<PdfGlyph> glyphs;
91 
92 	static bool collinear(qreal a, qreal b);
93 	bool isCloseToX(qreal x1, qreal x2);
94 	bool isCloseToY(qreal y1, qreal y2);
95 	bool adjunctLesser(qreal testY, qreal lastY, qreal baseY);
96 	bool adjunctGreater(qreal testY, qreal lastY, qreal baseY);
97 	PdfTextRegion::LineType linearTest(QPointF point, bool xInLimits, bool yInLimits);
98 	PdfTextRegion::LineType isRegionConcurrent(QPointF newPoint);
99 	PdfTextRegion::LineType moveToPoint(QPointF newPoint);
100 	PdfTextRegion::LineType addGlyphAtPoint(QPointF newGlyphPoint, PdfGlyph new_glyph);
101 	void renderToTextFrame(PageItem* textNode);
102 	bool isNew();
103 };
104 
105 class PdfTextRecognition
106 {
107 public:
108 	PdfTextRecognition();
109 	~PdfTextRecognition();
110 
111 	enum class AddCharMode
112 	{
113 		ADDFIRSTCHAR,
114 		ADDBASICCHAR,
115 		ADDCHARWITHNEWSTYLE,
116 		ADDCHARWITHPREVIOUSSTYLE,
117 		ADDCHARWITHBASESTLYE
118 	};
119 
setCharMode(AddCharMode mode)120 	void setCharMode(AddCharMode mode)
121 	{
122 		m_addCharMode = mode;
123 	}
124 
125 	PdfTextRegion* activePdfTextRegion = nullptr; //faster and cleaner than calling back on the vector all the time.
126 
127 	void addPdfTextRegion();
128 	void addChar(GfxState* state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, POPPLER_CONST_082 Unicode* u, int uLen);
129 	bool isNewLineOrRegion(QPointF newPosition);
130 
131 private:
132 	std::vector<PdfTextRegion> m_pdfTextRegions = std::vector<PdfTextRegion>();
133 	AddCharMode m_addCharMode = AddCharMode::ADDFIRSTCHAR;
134 	PdfGlyph AddCharCommon(GfxState* state, double x, double y, double dx, double dy, Unicode const* u, int uLen);
135 	PdfGlyph AddFirstChar(GfxState* state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode const* u, int uLen);
136 	PdfGlyph AddBasicChar(GfxState* state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode const* u, int uLen);
137 	PdfGlyph AddCharWithNewStyle(GfxState* state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode const* u, int uLen);
138 	PdfGlyph AddCharWithPreviousStyle(GfxState* state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode const* u, int uLen);
139 };
140 
141 
142 class PdfTextOutputDev : public SlaOutputDev
143 {
144 public:
145 	PdfTextOutputDev(ScribusDoc* doc, QList<PageItem*>* Elements, QStringList* importedColors, int flags);
146 	virtual ~PdfTextOutputDev();
147 
148 	void updateFont(GfxState* state) override;
149 
150 	//----- text drawing
151 	void  beginTextObject(GfxState* state) override;
152 	void  endTextObject(GfxState* state) override;
153 	void  drawChar(GfxState* state, double /*x*/, double /*y*/, double /*dx*/, double /*dy*/, double /*originX*/, double /*originY*/, CharCode /*code*/, int /*nBytes*/, POPPLER_CONST_082 Unicode* /*u*/, int /*uLen*/) override;
154 	GBool beginType3Char(GfxState* /*state*/, double /*x*/, double /*y*/, double /*dx*/, double /*dy*/, CharCode /*code*/, POPPLER_CONST_082 Unicode* /*u*/, int /*uLen*/) override;
155 	void  endType3Char(GfxState* /*state*/) override;
156 	void  type3D0(GfxState* /*state*/, double /*wx*/, double /*wy*/) override;
157 	void  type3D1(GfxState* /*state*/, double /*wx*/, double /*wy*/, double /*llx*/, double /*lly*/, double /*urx*/, double /*ury*/) override;
158 
159 private:
160 	void setFillAndStrokeForPDF(GfxState* state, PageItem* text_node);
161 	void updateTextPos(GfxState* state) override;
162 	void renderTextFrame();
163 	void finishItem(PageItem* item);
164 	PdfTextRecognition m_pdfTextRecognition = {};
165 };
166 #endif
167