1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
8
9 #include "core/fpdfapi/parser/cpdf_array.h"
10 #include "core/fpdfapi/parser/cpdf_boolean.h"
11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
12 #include "core/fpdfapi/parser/cpdf_number.h"
13 #include "core/fpdfapi/parser/cpdf_reference.h"
14 #include "core/fpdfapi/parser/cpdf_stream.h"
15 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
16 #include "core/fpdfapi/parser/cpdf_string.h"
17 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
18 #include "core/fxcrt/fx_extension.h"
19 #include "core/fxcrt/fx_stream.h"
20 #include "third_party/base/notreached.h"
21
22 // Indexed by 8-bit character code, contains either:
23 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
24 // 'N' - for numeric: 0123456789+-.
25 // 'D' - for delimiter: %()/<>[]{}
26 // 'R' - otherwise.
27 const char PDF_CharType[256] = {
28 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
29 // SI
30 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
31 'R',
32
33 // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS
34 // US
35 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
36 'R',
37
38 // SP ! " # $ % & ´ ( ) * + , - .
39 // /
40 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
41 'D',
42
43 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
44 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
45 'R',
46
47 // @ A B C D E F G H I J K L M N O
48 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
49 'R',
50
51 // P Q R S T U V W X Y Z [ \ ] ^ _
52 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
53 'R',
54
55 // ` a b c d e f g h i j k l m n o
56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
57 'R',
58
59 // p q r s t u v w x y z { | } ~
60 // DEL
61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
62 'R',
63
64 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
65 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
66 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
67 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
68 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
69 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
70 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
71 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
72 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
73
GetHeaderOffset(const RetainPtr<IFX_SeekableReadStream> & pFile)74 Optional<FX_FILESIZE> GetHeaderOffset(
75 const RetainPtr<IFX_SeekableReadStream>& pFile) {
76 static constexpr size_t kBufSize = 4;
77 uint8_t buf[kBufSize];
78 for (FX_FILESIZE offset = 0; offset <= 1024; ++offset) {
79 if (!pFile->ReadBlockAtOffset(buf, offset, kBufSize))
80 return {};
81
82 if (memcmp(buf, "%PDF", 4) == 0)
83 return offset;
84 }
85 return {};
86 }
87
GetDirectInteger(const CPDF_Dictionary * pDict,const ByteString & key)88 int32_t GetDirectInteger(const CPDF_Dictionary* pDict, const ByteString& key) {
89 const CPDF_Number* pObj = ToNumber(pDict->GetObjectFor(key));
90 return pObj ? pObj->GetInteger() : 0;
91 }
92
PDF_NameDecode(ByteStringView orig)93 ByteString PDF_NameDecode(ByteStringView orig) {
94 size_t src_size = orig.GetLength();
95 size_t out_index = 0;
96 ByteString result;
97 {
98 // Span's lifetime must end before ReleaseBuffer() below.
99 pdfium::span<char> pDest = result.GetBuffer(src_size);
100 for (size_t i = 0; i < src_size; i++) {
101 if (orig[i] == '#' && i + 2 < src_size) {
102 pDest[out_index++] = FXSYS_HexCharToInt(orig[i + 1]) * 16 +
103 FXSYS_HexCharToInt(orig[i + 2]);
104 i += 2;
105 } else {
106 pDest[out_index++] = orig[i];
107 }
108 }
109 }
110 result.ReleaseBuffer(out_index);
111 return result;
112 }
113
PDF_NameEncode(const ByteString & orig)114 ByteString PDF_NameEncode(const ByteString& orig) {
115 const uint8_t* src_buf = reinterpret_cast<const uint8_t*>(orig.c_str());
116 int src_len = orig.GetLength();
117 int dest_len = 0;
118 int i;
119 for (i = 0; i < src_len; i++) {
120 uint8_t ch = src_buf[i];
121 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
122 PDFCharIsDelimiter(ch)) {
123 dest_len += 3;
124 } else {
125 dest_len++;
126 }
127 }
128 if (dest_len == src_len)
129 return orig;
130
131 ByteString res;
132 {
133 // Span's lifetime must end before ReleaseBuffer() below.
134 pdfium::span<char> dest_buf = res.GetBuffer(dest_len);
135 dest_len = 0;
136 for (i = 0; i < src_len; i++) {
137 uint8_t ch = src_buf[i];
138 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
139 PDFCharIsDelimiter(ch)) {
140 dest_buf[dest_len++] = '#';
141 FXSYS_IntToTwoHexChars(ch, &dest_buf[dest_len]);
142 dest_len += 2;
143 continue;
144 }
145 dest_buf[dest_len++] = ch;
146 }
147 }
148 res.ReleaseBuffer(dest_len);
149 return res;
150 }
151
ReadArrayElementsToVector(const CPDF_Array * pArray,size_t nCount)152 std::vector<float> ReadArrayElementsToVector(const CPDF_Array* pArray,
153 size_t nCount) {
154 ASSERT(pArray);
155 ASSERT(pArray->size() >= nCount);
156 std::vector<float> ret(nCount);
157 for (size_t i = 0; i < nCount; ++i)
158 ret[i] = pArray->GetNumberAt(i);
159 return ret;
160 }
161
ValidateDictType(const CPDF_Dictionary * dict,const ByteString & type)162 bool ValidateDictType(const CPDF_Dictionary* dict, const ByteString& type) {
163 ASSERT(!type.IsEmpty());
164 return dict->GetNameFor("Type") == type;
165 }
166
ValidateDictAllResourcesOfType(const CPDF_Dictionary * dict,const ByteString & type)167 bool ValidateDictAllResourcesOfType(const CPDF_Dictionary* dict,
168 const ByteString& type) {
169 if (!dict)
170 return false;
171
172 CPDF_DictionaryLocker locker(dict);
173 for (const auto& it : locker) {
174 const CPDF_Dictionary* entry = ToDictionary(it.second.Get()->GetDirect());
175 if (!entry || !ValidateDictType(entry, type))
176 return false;
177 }
178 return true;
179 }
180
ValidateFontResourceDict(const CPDF_Dictionary * dict)181 bool ValidateFontResourceDict(const CPDF_Dictionary* dict) {
182 return ValidateDictAllResourcesOfType(dict, "Font");
183 }
184
operator <<(std::ostream & buf,const CPDF_Object * pObj)185 std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) {
186 if (!pObj) {
187 buf << " null";
188 return buf;
189 }
190 switch (pObj->GetType()) {
191 case CPDF_Object::kNullobj:
192 buf << " null";
193 break;
194 case CPDF_Object::kBoolean:
195 case CPDF_Object::kNumber:
196 buf << " " << pObj->GetString();
197 break;
198 case CPDF_Object::kString:
199 buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex());
200 break;
201 case CPDF_Object::kName: {
202 ByteString str = pObj->GetString();
203 buf << "/" << PDF_NameEncode(str);
204 break;
205 }
206 case CPDF_Object::kReference: {
207 buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
208 break;
209 }
210 case CPDF_Object::kArray: {
211 const CPDF_Array* p = pObj->AsArray();
212 buf << "[";
213 for (size_t i = 0; i < p->size(); i++) {
214 const CPDF_Object* pElement = p->GetObjectAt(i);
215 if (pElement && !pElement->IsInline()) {
216 buf << " " << pElement->GetObjNum() << " 0 R";
217 } else {
218 buf << pElement;
219 }
220 }
221 buf << "]";
222 break;
223 }
224 case CPDF_Object::kDictionary: {
225 CPDF_DictionaryLocker locker(pObj->AsDictionary());
226 buf << "<<";
227 for (const auto& it : locker) {
228 const ByteString& key = it.first;
229 CPDF_Object* pValue = it.second.Get();
230 buf << "/" << PDF_NameEncode(key);
231 if (pValue && !pValue->IsInline()) {
232 buf << " " << pValue->GetObjNum() << " 0 R ";
233 } else {
234 buf << pValue;
235 }
236 }
237 buf << ">>";
238 break;
239 }
240 case CPDF_Object::kStream: {
241 const CPDF_Stream* p = pObj->AsStream();
242 buf << p->GetDict() << "stream\r\n";
243 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(p);
244 pAcc->LoadAllDataRaw();
245 buf.write(reinterpret_cast<const char*>(pAcc->GetData()),
246 pAcc->GetSize());
247 buf << "\r\nendstream";
248 break;
249 }
250 default:
251 NOTREACHED();
252 break;
253 }
254 return buf;
255 }
256