1 // --------------------------------------------------------------------
2 // ipeextract
3 // --------------------------------------------------------------------
4 /*
5
6 This file is part of the extensible drawing editor Ipe.
7 Copyright (c) 1993-2020 Otfried Cheong
8
9 Ipe is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 As a special exception, you have permission to link Ipe with the
15 CGAL library and distribute executables, as long as you follow the
16 requirements of the Gnu General Public License in regard to all of
17 the software in the executable aside from CGAL.
18
19 Ipe is distributed in the hope that it will be useful, but WITHOUT
20 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
22 License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with Ipe; if not, you can find it at
26 "http://www.gnu.org/copyleft/gpl.html", or write to the Free
27 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28
29 */
30
31 #include "ipexml.h"
32 #include "ipeutils.h"
33 #include "ipepdfparser.h"
34 #include <cstdlib>
35
36 using namespace ipe;
37
38 // ---------------------------------------------------------------------
39
40 enum TFormat {EXml, EPdf, EEps, EIpe5, EUnknown};
41
readLine(DataSource & source)42 String readLine(DataSource &source)
43 {
44 String s;
45 int ch = source.getChar();
46 while (ch != EOF && ch != '\n') {
47 s += char(ch);
48 ch = source.getChar();
49 }
50 return s;
51 }
52
53 //! Determine format of file in \a source.
fileFormat(DataSource & source)54 TFormat fileFormat(DataSource &source)
55 {
56 String s1 = readLine(source);
57 String s2 = readLine(source);
58 if (s1.substr(0, 5) == "<?xml" || s1.substr(0, 4) == "<ipe")
59 return EXml;
60 if (s1.substr(0, 4) == "%PDF")
61 return EPdf; // let's assume it contains an Ipe stream
62 if (s1.substr(0, 4) == "%!PS") {
63 if (s2.substr(0, 11) != "%%Creator: ")
64 return EUnknown;
65 if (s2.substr(11, 6) == "Ipelib" || s2.substr(11, 4) == "xpdf")
66 return EEps;
67 if (s2.substr(11, 3) == "Ipe")
68 return EIpe5;
69 return EUnknown;
70 }
71 if (s1.substr(0, 5) == "%\\Ipe" || s1.substr(0, 6) == "%\\MIPE")
72 return EIpe5;
73 return EUnknown;
74 }
75
76 // --------------------------------------------------------------------
77
78 class StreamParser : public XmlParser {
79 public:
StreamParser(DataSource & source,std::FILE * out)80 explicit StreamParser(DataSource &source, std::FILE *out)
81 : XmlParser(source), iOut(out) { /* nothing */ }
82 bool parse();
83 virtual Buffer image(int objNum) = 0;
84 void writeAttributes(const XmlAttributes &attr);
85 bool parseBitmap();
86 private:
87 std::FILE *iOut;
88 };
89
parse()90 bool StreamParser::parse()
91 {
92 while (!eos()) {
93 bool lt = (iCh == '<');
94 fputc(iCh, iOut);
95 getChar();
96 // look out for <bitmap> tag
97 if (lt && iCh == 'b') {
98 String tag;
99 while (isTagChar(iCh)) {
100 tag += char(iCh);
101 fputc(iCh, iOut);
102 getChar();
103 }
104 // at char after tag
105 if (tag == "bitmap" && !parseBitmap())
106 return false;
107 }
108 }
109 return true;
110 }
111
112 // write out attributes, but drop 'pdfObject'
writeAttributes(const XmlAttributes & attr)113 void StreamParser::writeAttributes(const XmlAttributes &attr)
114 {
115 for (XmlAttributes::const_iterator it = attr.begin();
116 it != attr.end(); ++it)
117 if (it->first != "pdfObject")
118 fprintf(iOut, " %s=\"%s\"", it->first.z(), it->second.z());
119 fprintf(iOut, ">\n");
120 }
121
writeBits(FILE * out,Buffer bits)122 static void writeBits(FILE *out, Buffer bits)
123 {
124 const char *data = bits.data();
125 const char *fin = data + bits.size();
126 int col = 0;
127 while (data != fin) {
128 fprintf(out, "%02x", (*data++ & 0xff));
129 if (++col == 36) {
130 fputc('\n', out);
131 col = 0;
132 }
133 }
134 if (col > 0)
135 fputc('\n', out);
136 }
137
parseBitmap()138 bool StreamParser::parseBitmap()
139 {
140 XmlAttributes attr;
141 if (!parseAttributes(attr))
142 return false;
143 String objNumStr;
144 if (attr.slash() && attr.has("pdfObject", objNumStr)) {
145 Lex lex(objNumStr);
146 Buffer bits = image(lex.getInt());
147 Buffer alpha;
148 lex.skipWhitespace();
149 if (!lex.eos()) {
150 alpha = image(lex.getInt());
151 fprintf(iOut, " alphaLength=\"%d\"", alpha.size());
152 }
153 fprintf(iOut, " length=\"%d\"", bits.size());
154 writeAttributes(attr);
155 writeBits(iOut, bits);
156 if (alpha.size() > 0)
157 writeBits(iOut, alpha);
158 fprintf(iOut, "</bitmap>\n");
159 } else {
160 // just write out attributes
161 writeAttributes(attr);
162 }
163 return true;
164 }
165
166 // --------------------------------------------------------------------
167
168 class StreamParserPdf : public StreamParser {
169 public:
StreamParserPdf(PdfFile & loader,DataSource & source,std::FILE * out)170 explicit StreamParserPdf(PdfFile &loader, DataSource &source,
171 std::FILE *out)
172 : StreamParser(source, out), iLoader(loader) { /* nothing */ }
173 virtual Buffer image(int objNum);
174 private:
175 PdfFile &iLoader;
176 };
177
image(int objNum)178 Buffer StreamParserPdf::image(int objNum)
179 {
180 const PdfObj *obj = iLoader.object(objNum);
181 if (!obj || !obj->dict() || obj->dict()->stream().size() == 0)
182 return Buffer();
183 return obj->dict()->stream();
184 }
185
186 // --------------------------------------------------------------------
187
188 class PsSource : public DataSource {
189 public:
PsSource(DataSource & source)190 PsSource(DataSource &source) : iSource(source) { /* nothing */ }
191 bool skipToXml();
192 String readLine();
193 Buffer image(int index) const;
194 int getNext() const;
deflated() const195 inline bool deflated() const { return iDeflated; }
196
197 virtual int getChar();
198 private:
199 DataSource &iSource;
200 std::vector<Buffer> iImages;
201 bool iEos;
202 bool iDeflated;
203 };
204
getChar()205 int PsSource::getChar()
206 {
207 int ch = iSource.getChar();
208 if (ch == '\n')
209 iSource.getChar(); // remove '%'
210 return ch;
211 }
212
readLine()213 String PsSource::readLine()
214 {
215 String s;
216 int ch = iSource.getChar();
217 while (ch != EOF && ch != '\n') {
218 s += char(ch);
219 ch = iSource.getChar();
220 }
221 iEos = (ch == EOF);
222 return s;
223 }
224
image(int index) const225 Buffer PsSource::image(int index) const
226 {
227 if (1 <= index && index <= int(iImages.size()))
228 return iImages[index - 1];
229 else
230 return Buffer();
231 }
232
skipToXml()233 bool PsSource::skipToXml()
234 {
235 iDeflated = false;
236
237 String s1 = readLine();
238 String s2 = readLine();
239
240 if (s1.substr(0, 11) != "%!PS-Adobe-" ||
241 s2.substr(0, 11) != "%%Creator: ")
242 return false;
243
244 if (s2.substr(11, 6) == "Ipelib") {
245 // the 'modern' file format of Ipe 6.0 preview 17 and later
246 do {
247 s1 = readLine();
248 if (s1.substr(0, 17) == "%%BeginIpeImage: ") {
249 Lex lex(s1.substr(17));
250 int num, len;
251 lex >> num >> len;
252 if (num != int(iImages.size() + 1))
253 return false;
254 (void) readLine(); // skip 'image'
255 Buffer buf(len);
256 A85Source a85(iSource);
257 char *p = buf.data();
258 char *p1 = p + buf.size();
259 while (p < p1) {
260 int ch = a85.getChar();
261 if (ch == EOF)
262 return false;
263 *p++ = char(ch);
264 }
265 iImages.push_back(buf);
266 }
267 } while (!iEos && s1.substr(0, 13) != "%%BeginIpeXml");
268
269 iDeflated = (s1.substr(13, 14) == ": /FlateDecode");
270
271 } else {
272 // the 'old' file format generated through pdftops
273 do {
274 s1 = readLine();
275 } while (!iEos && s1.substr(0, 10) != "%%EndSetup");
276 }
277 if (iEos)
278 return false;
279 (void) iSource.getChar(); // skip '%' before <ipe>
280 return true;
281 }
282
283 // --------------------------------------------------------------------
284
285 class StreamParserPs : public StreamParser {
286 public:
StreamParserPs(PsSource & loader,DataSource & source,std::FILE * out)287 explicit StreamParserPs(PsSource &loader, DataSource &source,
288 std::FILE *out)
289 : StreamParser(source, out), iLoader(loader) { /* nothing */ }
290 virtual Buffer image(int objNum);
291 private:
292 PsSource &iLoader;
293 };
294
image(int objNum)295 Buffer StreamParserPs::image(int objNum)
296 {
297 return iLoader.image(objNum);
298 }
299
300 // --------------------------------------------------------------------
301
extractPs(DataSource & source,std::FILE * out)302 static bool extractPs(DataSource &source, std::FILE *out)
303 {
304 PsSource psSource(source);
305 if (!psSource.skipToXml()) {
306 fprintf(stderr, "Could not find XML stream.\n");
307 return false;
308 }
309
310 if (psSource.deflated()) {
311 A85Source a85(psSource);
312 InflateSource source(a85);
313 StreamParserPs parser(psSource, source, out);
314 return parser.parse();
315 } else {
316 StreamParserPs parser(psSource, psSource, out);
317 return parser.parse();
318 }
319 return false;
320 }
321
extractPdf(DataSource & source,std::FILE * out)322 static bool extractPdf(DataSource &source, std::FILE *out)
323 {
324 PdfFile loader;
325 if (!loader.parse(source)) {
326 fprintf(stderr, "Error parsing PDF file - probably not an Ipe file.\n");
327 return false;
328 }
329
330 // try ancient format version first (early previews of Ipe 6.0)
331 const PdfObj *obj = loader.catalog()->get("Ipe", &loader);
332
333 // otherwise try most recent format (>= 7.2.11)
334 if (!obj) {
335 obj = loader.catalog()->get("PieceInfo", &loader);
336 if (obj && obj->dict()) {
337 obj = obj->dict()->get("Ipe", &loader);
338 if (obj && obj->dict())
339 obj = obj->dict()->get("Private", &loader);
340 }
341 }
342
343 if (!obj)
344 obj = loader.object(1);
345
346 if (!obj || !obj->dict()) {
347 fprintf(stderr, "Input file does not contain an Ipe XML stream.\n");
348 return false;
349 }
350
351 const PdfObj *type = obj->dict()->get("Type");
352 if (!type || !type->name() || type->name()->value() != "Ipe") {
353 fprintf(stderr, "Input file does not contain an Ipe XML stream.\n");
354 return false;
355 }
356
357 Buffer buffer = obj->dict()->stream();
358 BufferSource xml(buffer);
359
360 if (obj->dict()->deflated()) {
361 InflateSource xml1(xml);
362 StreamParserPdf parser(loader, xml1, out);
363 return parser.parse();
364 } else {
365 StreamParserPdf parser(loader, xml, out);
366 return parser.parse();
367 }
368 }
369
370 // --------------------------------------------------------------------
371
usage()372 static void usage()
373 {
374 fprintf(stderr,
375 "Usage: ipeextract ( <input.pdf> | <input.eps> ) [<output.xml>]\n"
376 "Ipeextract extracts the XML stream from a PDF or Postscript file\n"
377 "generated by any version of Ipe 6 or Ipe 7.\n"
378 );
379 exit(1);
380 }
381
main(int argc,char * argv[])382 int main(int argc, char *argv[])
383 {
384 Platform::initLib(IPELIB_VERSION);
385
386 // ensure one or two arguments
387 if (argc != 2 && argc != 3)
388 usage();
389
390 const char *src = argv[1];
391 String dst;
392
393 if (argc == 3) {
394 dst = argv[2];
395 } else {
396 String s = src;
397 if (s.right(4) == ".pdf" || s.right(4) == ".eps")
398 dst = s.left(s.size() - 3) + "xml";
399 else
400 dst = s + ".xml";
401 }
402
403 std::FILE *fd = Platform::fopen(src, "rb");
404 if (!fd) {
405 std::fprintf(stderr, "Could not open '%s'\n", src);
406 exit(1);
407 }
408 FileSource source(fd);
409 TFormat format = fileFormat(source);
410 if (format == EXml) {
411 fprintf(stderr, "Input file is already in XML format.\n");
412 } else if (format == EIpe5) {
413 fprintf(stderr, "Input file is in Ipe5 format.\n"
414 "Run 'ipe5toxml' to convert it to XML format.\n");
415 } else {
416 std::rewind(fd);
417 std::FILE *out = Platform::fopen(dst.z(), "wb");
418 if (!out) {
419 fprintf(stderr, "Could not open '%s' for writing.\n", dst.z());
420 } else {
421 bool res = (format == EPdf) ?
422 extractPdf(source, out) : extractPs(source, out);
423 if (!res)
424 fprintf(stderr, "Error during extraction of XML stream.\n");
425 std::fclose(out);
426 }
427 }
428 std::fclose(fd);
429 return 0;
430 }
431
432 // --------------------------------------------------------------------
433