1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/ctypeset.h"
8 
9 #include <algorithm>
10 
11 #include "core/fpdfdoc/cline.h"
12 #include "core/fpdfdoc/cpvt_wordinfo.h"
13 #include "core/fpdfdoc/csection.h"
14 
15 namespace {
16 
17 const uint8_t special_chars[128] = {
18     0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
19     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
20     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
21     0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28,
22     0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08,
23     0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
24     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
25     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
26     0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
27     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
28     0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
29 };
30 
IsLatin(uint16_t word)31 bool IsLatin(uint16_t word) {
32   if (word <= 0x007F)
33     return !!(special_chars[word] & 0x01);
34 
35   return ((word >= 0x00C0 && word <= 0x00FF) ||
36           (word >= 0x0100 && word <= 0x024F) ||
37           (word >= 0x1E00 && word <= 0x1EFF) ||
38           (word >= 0x2C60 && word <= 0x2C7F) ||
39           (word >= 0xA720 && word <= 0xA7FF) ||
40           (word >= 0xFF21 && word <= 0xFF3A) ||
41           (word >= 0xFF41 && word <= 0xFF5A));
42 }
43 
IsDigit(uint32_t word)44 bool IsDigit(uint32_t word) {
45   return word >= 0x0030 && word <= 0x0039;
46 }
47 
IsCJK(uint32_t word)48 bool IsCJK(uint32_t word) {
49   if ((word >= 0x1100 && word <= 0x11FF) ||
50       (word >= 0x2E80 && word <= 0x2FFF) ||
51       (word >= 0x3040 && word <= 0x9FBF) ||
52       (word >= 0xAC00 && word <= 0xD7AF) ||
53       (word >= 0xF900 && word <= 0xFAFF) ||
54       (word >= 0xFE30 && word <= 0xFE4F) ||
55       (word >= 0x20000 && word <= 0x2A6DF) ||
56       (word >= 0x2F800 && word <= 0x2FA1F)) {
57     return true;
58   }
59   if (word >= 0x3000 && word <= 0x303F) {
60     return (
61         word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 ||
62         word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 ||
63         word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 ||
64         word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035);
65   }
66   return word >= 0xFF66 && word <= 0xFF9D;
67 }
68 
IsPunctuation(uint32_t word)69 bool IsPunctuation(uint32_t word) {
70   if (word <= 0x007F)
71     return !!(special_chars[word] & 0x08);
72 
73   if (word >= 0x0080 && word <= 0x00FF) {
74     return (word == 0x0082 || word == 0x0084 || word == 0x0085 ||
75             word == 0x0091 || word == 0x0092 || word == 0x0093 ||
76             word <= 0x0094 || word == 0x0096 || word == 0x00B4 ||
77             word == 0x00B8);
78   }
79 
80   if (word >= 0x2000 && word <= 0x206F) {
81     return (
82         word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 ||
83         word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B ||
84         word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F ||
85         word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 ||
86         word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D ||
87         word == 0x203E || word == 0x2044);
88   }
89 
90   if (word >= 0x3000 && word <= 0x303F) {
91     return (
92         word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 ||
93         word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C ||
94         word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 ||
95         word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 ||
96         word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A ||
97         word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F);
98   }
99 
100   if (word >= 0xFE50 && word <= 0xFE6F)
101     return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63;
102 
103   if (word >= 0xFF00 && word <= 0xFFEF) {
104     return (
105         word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 ||
106         word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F ||
107         word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B ||
108         word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C ||
109         word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 ||
110         word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F);
111   }
112 
113   return false;
114 }
115 
IsConnectiveSymbol(uint32_t word)116 bool IsConnectiveSymbol(uint32_t word) {
117   return word <= 0x007F && (special_chars[word] & 0x20);
118 }
119 
IsOpenStylePunctuation(uint32_t word)120 bool IsOpenStylePunctuation(uint32_t word) {
121   if (word <= 0x007F)
122     return !!(special_chars[word] & 0x04);
123 
124   return (word == 0x300A || word == 0x300C || word == 0x300E ||
125           word == 0x3010 || word == 0x3014 || word == 0x3016 ||
126           word == 0x3018 || word == 0x301A || word == 0xFF08 ||
127           word == 0xFF3B || word == 0xFF5B || word == 0xFF62);
128 }
129 
IsCurrencySymbol(uint16_t word)130 bool IsCurrencySymbol(uint16_t word) {
131   return (word == 0x0024 || word == 0x0080 || word == 0x00A2 ||
132           word == 0x00A3 || word == 0x00A4 || word == 0x00A5 ||
133           (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 ||
134           word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 ||
135           word == 0xFFE5 || word == 0xFFE6);
136 }
137 
IsPrefixSymbol(uint16_t word)138 bool IsPrefixSymbol(uint16_t word) {
139   return IsCurrencySymbol(word) || word == 0x2116;
140 }
141 
IsSpace(uint16_t word)142 bool IsSpace(uint16_t word) {
143   return word == 0x0020 || word == 0x3000;
144 }
145 
NeedDivision(uint16_t prevWord,uint16_t curWord)146 bool NeedDivision(uint16_t prevWord, uint16_t curWord) {
147   if ((IsLatin(prevWord) || IsDigit(prevWord)) &&
148       (IsLatin(curWord) || IsDigit(curWord))) {
149     return false;
150   }
151   if (IsSpace(curWord) || IsPunctuation(curWord)) {
152     return false;
153   }
154   if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) {
155     return false;
156   }
157   if (IsSpace(prevWord) || IsPunctuation(prevWord)) {
158     return true;
159   }
160   if (IsPrefixSymbol(prevWord)) {
161     return false;
162   }
163   if (IsPrefixSymbol(curWord) || IsCJK(curWord)) {
164     return true;
165   }
166   if (IsCJK(prevWord)) {
167     return true;
168   }
169   return false;
170 }
171 
172 }  // namespace
173 
CTypeset(CSection * pSection)174 CTypeset::CTypeset(CSection* pSection)
175     : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f),
176       m_pVT(pSection->m_pVT),
177       m_pSection(pSection) {}
178 
~CTypeset()179 CTypeset::~CTypeset() {}
180 
CharArray()181 CPVT_FloatRect CTypeset::CharArray() {
182   ASSERT(m_pSection);
183   FX_FLOAT fLineAscent =
184       m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
185   FX_FLOAT fLineDescent =
186       m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
187   m_rcRet.Default();
188   FX_FLOAT x = 0.0f, y = 0.0f;
189   FX_FLOAT fNextWidth;
190   int32_t nStart = 0;
191   FX_FLOAT fNodeWidth = m_pVT->GetPlateWidth() /
192                         (m_pVT->m_nCharArray <= 0 ? 1 : m_pVT->m_nCharArray);
193   if (CLine* pLine = m_pSection->m_LineArray.GetAt(0)) {
194     x = 0.0f;
195     y += m_pVT->GetLineLeading(m_pSection->m_SecInfo);
196     y += fLineAscent;
197     nStart = 0;
198     switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) {
199       case 0:
200         pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF;
201         break;
202       case 1:
203         nStart = (m_pVT->m_nCharArray - m_pSection->m_WordArray.GetSize()) / 2;
204         pLine->m_LineInfo.fLineX =
205             fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
206         break;
207       case 2:
208         nStart = m_pVT->m_nCharArray - m_pSection->m_WordArray.GetSize();
209         pLine->m_LineInfo.fLineX =
210             fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
211         break;
212     }
213     for (int32_t w = 0, sz = m_pSection->m_WordArray.GetSize(); w < sz; w++) {
214       if (w >= m_pVT->m_nCharArray) {
215         break;
216       }
217       fNextWidth = 0;
218       if (CPVT_WordInfo* pNextWord = m_pSection->m_WordArray.GetAt(w + 1)) {
219         pNextWord->fWordTail = 0;
220         fNextWidth = m_pVT->GetWordWidth(*pNextWord);
221       }
222       if (CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(w)) {
223         pWord->fWordTail = 0;
224         FX_FLOAT fWordWidth = m_pVT->GetWordWidth(*pWord);
225         FX_FLOAT fWordAscent = m_pVT->GetWordAscent(*pWord);
226         FX_FLOAT fWordDescent = m_pVT->GetWordDescent(*pWord);
227         x = (FX_FLOAT)(fNodeWidth * (w + nStart + 0.5) -
228                        fWordWidth * VARIABLETEXT_HALF);
229         pWord->fWordX = x;
230         pWord->fWordY = y;
231         if (w == 0) {
232           pLine->m_LineInfo.fLineX = x;
233         }
234         if (w != m_pSection->m_WordArray.GetSize() - 1) {
235           pWord->fWordTail =
236               (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0
237                    ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF
238                    : 0);
239         } else {
240           pWord->fWordTail = 0;
241         }
242         x += fWordWidth;
243         fLineAscent = std::max(fLineAscent, fWordAscent);
244         fLineDescent = std::min(fLineDescent, fWordDescent);
245       }
246     }
247     pLine->m_LineInfo.nBeginWordIndex = 0;
248     pLine->m_LineInfo.nEndWordIndex = m_pSection->m_WordArray.GetSize() - 1;
249     pLine->m_LineInfo.fLineY = y;
250     pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX;
251     pLine->m_LineInfo.fLineAscent = fLineAscent;
252     pLine->m_LineInfo.fLineDescent = fLineDescent;
253     y -= fLineDescent;
254   }
255   return m_rcRet = CPVT_FloatRect(0, 0, x, y);
256 }
257 
GetEditSize(FX_FLOAT fFontSize)258 CFX_SizeF CTypeset::GetEditSize(FX_FLOAT fFontSize) {
259   ASSERT(m_pSection);
260   ASSERT(m_pVT);
261   SplitLines(false, fFontSize);
262   return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height());
263 }
264 
Typeset()265 CPVT_FloatRect CTypeset::Typeset() {
266   ASSERT(m_pVT);
267   m_pSection->m_LineArray.Empty();
268   SplitLines(true, 0.0f);
269   m_pSection->m_LineArray.Clear();
270   OutputLines();
271   return m_rcRet;
272 }
273 
SplitLines(bool bTypeset,FX_FLOAT fFontSize)274 void CTypeset::SplitLines(bool bTypeset, FX_FLOAT fFontSize) {
275   ASSERT(m_pVT);
276   ASSERT(m_pSection);
277   int32_t nLineHead = 0;
278   int32_t nLineTail = 0;
279   FX_FLOAT fMaxX = 0.0f, fMaxY = 0.0f;
280   FX_FLOAT fLineWidth = 0.0f, fBackupLineWidth = 0.0f;
281   FX_FLOAT fLineAscent = 0.0f, fBackupLineAscent = 0.0f;
282   FX_FLOAT fLineDescent = 0.0f, fBackupLineDescent = 0.0f;
283   int32_t nWordStartPos = 0;
284   bool bFullWord = false;
285   int32_t nLineFullWordIndex = 0;
286   int32_t nCharIndex = 0;
287   CPVT_LineInfo line;
288   FX_FLOAT fWordWidth = 0;
289   FX_FLOAT fTypesetWidth = std::max(
290       m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(m_pSection->m_SecInfo),
291       0.0f);
292   int32_t nTotalWords = m_pSection->m_WordArray.GetSize();
293   bool bOpened = false;
294   if (nTotalWords > 0) {
295     int32_t i = 0;
296     while (i < nTotalWords) {
297       CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(i);
298       CPVT_WordInfo* pOldWord = pWord;
299       if (i > 0) {
300         pOldWord = m_pSection->m_WordArray.GetAt(i - 1);
301       }
302       if (pWord) {
303         if (bTypeset) {
304           fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord));
305           fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord));
306           fWordWidth = m_pVT->GetWordWidth(*pWord);
307         } else {
308           fLineAscent =
309               std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize));
310           fLineDescent =
311               std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize));
312           fWordWidth = m_pVT->GetWordWidth(
313               pWord->nFontIndex, pWord->Word, m_pVT->m_wSubWord,
314               m_pVT->m_fCharSpace, m_pVT->m_nHorzScale, fFontSize,
315               pWord->fWordTail);
316         }
317         if (!bOpened) {
318           if (IsOpenStylePunctuation(pWord->Word)) {
319             bOpened = true;
320             bFullWord = true;
321           } else if (pOldWord) {
322             if (NeedDivision(pOldWord->Word, pWord->Word)) {
323               bFullWord = true;
324             }
325           }
326         } else {
327           if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) {
328             bOpened = false;
329           }
330         }
331         if (bFullWord) {
332           bFullWord = false;
333           if (nCharIndex > 0) {
334             nLineFullWordIndex++;
335           }
336           nWordStartPos = i;
337           fBackupLineWidth = fLineWidth;
338           fBackupLineAscent = fLineAscent;
339           fBackupLineDescent = fLineDescent;
340         }
341         nCharIndex++;
342       }
343       if (m_pVT->m_bLimitWidth && fTypesetWidth > 0 &&
344           fLineWidth + fWordWidth > fTypesetWidth) {
345         if (nLineFullWordIndex > 0) {
346           i = nWordStartPos;
347           fLineWidth = fBackupLineWidth;
348           fLineAscent = fBackupLineAscent;
349           fLineDescent = fBackupLineDescent;
350         }
351         if (nCharIndex == 1) {
352           fLineWidth = fWordWidth;
353           i++;
354         }
355         nLineTail = i - 1;
356         if (bTypeset) {
357           line.nBeginWordIndex = nLineHead;
358           line.nEndWordIndex = nLineTail;
359           line.nTotalWord = nLineTail - nLineHead + 1;
360           line.fLineWidth = fLineWidth;
361           line.fLineAscent = fLineAscent;
362           line.fLineDescent = fLineDescent;
363           m_pSection->AddLine(line);
364         }
365         fMaxY += (fLineAscent + m_pVT->GetLineLeading(m_pSection->m_SecInfo));
366         fMaxY -= fLineDescent;
367         fMaxX = std::max(fLineWidth, fMaxX);
368         nLineHead = i;
369         fLineWidth = 0.0f;
370         fLineAscent = 0.0f;
371         fLineDescent = 0.0f;
372         nCharIndex = 0;
373         nLineFullWordIndex = 0;
374         bFullWord = false;
375       } else {
376         fLineWidth += fWordWidth;
377         i++;
378       }
379     }
380     if (nLineHead <= nTotalWords - 1) {
381       nLineTail = nTotalWords - 1;
382       if (bTypeset) {
383         line.nBeginWordIndex = nLineHead;
384         line.nEndWordIndex = nLineTail;
385         line.nTotalWord = nLineTail - nLineHead + 1;
386         line.fLineWidth = fLineWidth;
387         line.fLineAscent = fLineAscent;
388         line.fLineDescent = fLineDescent;
389         m_pSection->AddLine(line);
390       }
391       fMaxY += (fLineAscent + m_pVT->GetLineLeading(m_pSection->m_SecInfo));
392       fMaxY -= fLineDescent;
393       fMaxX = std::max(fLineWidth, fMaxX);
394     }
395   } else {
396     if (bTypeset) {
397       fLineAscent = m_pVT->GetLineAscent(m_pSection->m_SecInfo);
398       fLineDescent = m_pVT->GetLineDescent(m_pSection->m_SecInfo);
399     } else {
400       fLineAscent =
401           m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize);
402       fLineDescent =
403           m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize);
404     }
405     if (bTypeset) {
406       line.nBeginWordIndex = -1;
407       line.nEndWordIndex = -1;
408       line.nTotalWord = 0;
409       line.fLineWidth = 0;
410       line.fLineAscent = fLineAscent;
411       line.fLineDescent = fLineDescent;
412       m_pSection->AddLine(line);
413     }
414     fMaxY += m_pVT->GetLineLeading(m_pSection->m_SecInfo) + fLineAscent -
415              fLineDescent;
416   }
417   m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY);
418 }
419 
OutputLines()420 void CTypeset::OutputLines() {
421   ASSERT(m_pVT);
422   ASSERT(m_pSection);
423   FX_FLOAT fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f;
424   FX_FLOAT fPosX = 0.0f, fPosY = 0.0f;
425   FX_FLOAT fLineIndent = m_pVT->GetLineIndent(m_pSection->m_SecInfo);
426   FX_FLOAT fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f);
427   switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) {
428     default:
429     case 0:
430       fMinX = 0.0f;
431       break;
432     case 1:
433       fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF;
434       break;
435     case 2:
436       fMinX = fTypesetWidth - m_rcRet.Width();
437       break;
438   }
439   fMaxX = fMinX + m_rcRet.Width();
440   fMinY = 0.0f;
441   fMaxY = m_rcRet.Height();
442   int32_t nTotalLines = m_pSection->m_LineArray.GetSize();
443   if (nTotalLines > 0) {
444     m_pSection->m_SecInfo.nTotalLine = nTotalLines;
445     for (int32_t l = 0; l < nTotalLines; l++) {
446       if (CLine* pLine = m_pSection->m_LineArray.GetAt(l)) {
447         switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) {
448           default:
449           case 0:
450             fPosX = 0;
451             break;
452           case 1:
453             fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) *
454                     VARIABLETEXT_HALF;
455             break;
456           case 2:
457             fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth;
458             break;
459         }
460         fPosX += fLineIndent;
461         fPosY += m_pVT->GetLineLeading(m_pSection->m_SecInfo);
462         fPosY += pLine->m_LineInfo.fLineAscent;
463         pLine->m_LineInfo.fLineX = fPosX - fMinX;
464         pLine->m_LineInfo.fLineY = fPosY - fMinY;
465         for (int32_t w = pLine->m_LineInfo.nBeginWordIndex;
466              w <= pLine->m_LineInfo.nEndWordIndex; w++) {
467           if (CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(w)) {
468             pWord->fWordX = fPosX - fMinX;
469             if (pWord->pWordProps) {
470               switch (pWord->pWordProps->nScriptType) {
471                 default:
472                 case CPDF_VariableText::ScriptType::Normal:
473                   pWord->fWordY = fPosY - fMinY;
474                   break;
475                 case CPDF_VariableText::ScriptType::Super:
476                   pWord->fWordY = fPosY - m_pVT->GetWordAscent(*pWord) - fMinY;
477                   break;
478                 case CPDF_VariableText::ScriptType::Sub:
479                   pWord->fWordY = fPosY - m_pVT->GetWordDescent(*pWord) - fMinY;
480                   break;
481               }
482             } else {
483               pWord->fWordY = fPosY - fMinY;
484             }
485             fPosX += m_pVT->GetWordWidth(*pWord);
486           }
487         }
488         fPosY -= pLine->m_LineInfo.fLineDescent;
489       }
490     }
491   }
492   m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY);
493 }
494