1 // --------------------------------------------------------------------
2 // ipeextract
3 // --------------------------------------------------------------------
4 /*
5 
6     This file is part of the extensible drawing editor Ipe.
7     Copyright (c) 1993-2020 Otfried Cheong
8 
9     Ipe is free software; you can redistribute it and/or modify it
10     under the terms of the GNU General Public License as published by
11     the Free Software Foundation; either version 3 of the License, or
12     (at your option) any later version.
13 
14     As a special exception, you have permission to link Ipe with the
15     CGAL library and distribute executables, as long as you follow the
16     requirements of the Gnu General Public License in regard to all of
17     the software in the executable aside from CGAL.
18 
19     Ipe is distributed in the hope that it will be useful, but WITHOUT
20     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21     or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
22     License for more details.
23 
24     You should have received a copy of the GNU General Public License
25     along with Ipe; if not, you can find it at
26     "http://www.gnu.org/copyleft/gpl.html", or write to the Free
27     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 
29 */
30 
31 #include "ipexml.h"
32 #include "ipeutils.h"
33 #include "ipepdfparser.h"
34 #include <cstdlib>
35 
36 using namespace ipe;
37 
38 // ---------------------------------------------------------------------
39 
40 enum TFormat {EXml, EPdf, EEps, EIpe5, EUnknown};
41 
readLine(DataSource & source)42 String readLine(DataSource &source)
43 {
44   String s;
45   int ch = source.getChar();
46   while (ch != EOF && ch != '\n') {
47     s += char(ch);
48     ch = source.getChar();
49   }
50   return s;
51 }
52 
53 //! Determine format of file in \a source.
fileFormat(DataSource & source)54 TFormat fileFormat(DataSource &source)
55 {
56   String s1 = readLine(source);
57   String s2 = readLine(source);
58   if (s1.substr(0, 5) == "<?xml" || s1.substr(0, 4) == "<ipe")
59     return EXml;
60   if (s1.substr(0, 4) == "%PDF")
61     return EPdf;  // let's assume it contains an Ipe stream
62   if (s1.substr(0, 4) == "%!PS") {
63     if (s2.substr(0, 11) != "%%Creator: ")
64       return EUnknown;
65     if (s2.substr(11, 6) == "Ipelib" || s2.substr(11, 4) == "xpdf")
66       return EEps;
67     if (s2.substr(11, 3) == "Ipe")
68       return EIpe5;
69     return EUnknown;
70   }
71   if (s1.substr(0, 5) == "%\\Ipe" || s1.substr(0, 6) == "%\\MIPE")
72     return EIpe5;
73   return EUnknown;
74 }
75 
76 // --------------------------------------------------------------------
77 
78 class StreamParser : public XmlParser {
79 public:
StreamParser(DataSource & source,std::FILE * out)80   explicit StreamParser(DataSource &source, std::FILE *out)
81     : XmlParser(source), iOut(out) { /* nothing */ }
82   bool parse();
83   virtual Buffer image(int objNum) = 0;
84   void writeAttributes(const XmlAttributes &attr);
85   bool parseBitmap();
86 private:
87   std::FILE *iOut;
88 };
89 
parse()90 bool StreamParser::parse()
91 {
92   while (!eos()) {
93     bool lt = (iCh == '<');
94     fputc(iCh, iOut);
95     getChar();
96     // look out for <bitmap> tag
97     if (lt && iCh == 'b') {
98       String tag;
99       while (isTagChar(iCh)) {
100 	tag += char(iCh);
101 	fputc(iCh, iOut);
102 	getChar();
103       }
104       // at char after tag
105       if (tag == "bitmap" && !parseBitmap())
106 	return false;
107     }
108   }
109   return true;
110 }
111 
112 // write out attributes, but drop 'pdfObject'
writeAttributes(const XmlAttributes & attr)113 void StreamParser::writeAttributes(const XmlAttributes &attr)
114 {
115   for (XmlAttributes::const_iterator it = attr.begin();
116        it != attr.end(); ++it)
117     if (it->first != "pdfObject")
118       fprintf(iOut, " %s=\"%s\"", it->first.z(), it->second.z());
119   fprintf(iOut, ">\n");
120 }
121 
writeBits(FILE * out,Buffer bits)122 static void writeBits(FILE *out, Buffer bits)
123 {
124   const char *data = bits.data();
125   const char *fin = data + bits.size();
126   int col = 0;
127   while (data != fin) {
128     fprintf(out, "%02x", (*data++ & 0xff));
129     if (++col == 36) {
130       fputc('\n', out);
131       col = 0;
132     }
133   }
134   if (col > 0)
135     fputc('\n', out);
136 }
137 
parseBitmap()138 bool StreamParser::parseBitmap()
139 {
140   XmlAttributes attr;
141   if (!parseAttributes(attr))
142     return false;
143   String objNumStr;
144   if (attr.slash() && attr.has("pdfObject", objNumStr)) {
145     Lex lex(objNumStr);
146     Buffer bits = image(lex.getInt());
147     Buffer alpha;
148     lex.skipWhitespace();
149     if (!lex.eos()) {
150       alpha = image(lex.getInt());
151       fprintf(iOut, " alphaLength=\"%d\"", alpha.size());
152     }
153     fprintf(iOut, " length=\"%d\"", bits.size());
154     writeAttributes(attr);
155     writeBits(iOut, bits);
156     if (alpha.size() > 0)
157       writeBits(iOut, alpha);
158     fprintf(iOut, "</bitmap>\n");
159   } else {
160     // just write out attributes
161     writeAttributes(attr);
162   }
163   return true;
164 }
165 
166 // --------------------------------------------------------------------
167 
168 class StreamParserPdf : public StreamParser {
169 public:
StreamParserPdf(PdfFile & loader,DataSource & source,std::FILE * out)170   explicit StreamParserPdf(PdfFile &loader, DataSource &source,
171 			   std::FILE *out)
172     : StreamParser(source, out), iLoader(loader) { /* nothing */ }
173   virtual Buffer image(int objNum);
174 private:
175   PdfFile &iLoader;
176 };
177 
image(int objNum)178 Buffer StreamParserPdf::image(int objNum)
179 {
180   const PdfObj *obj = iLoader.object(objNum);
181   if (!obj || !obj->dict() || obj->dict()->stream().size() == 0)
182     return Buffer();
183   return obj->dict()->stream();
184 }
185 
186 // --------------------------------------------------------------------
187 
188 class PsSource : public DataSource {
189 public:
PsSource(DataSource & source)190   PsSource(DataSource &source) : iSource(source) { /* nothing */ }
191   bool skipToXml();
192   String readLine();
193   Buffer image(int index) const;
194   int getNext() const;
deflated() const195   inline bool deflated() const { return iDeflated; }
196 
197   virtual int getChar();
198 private:
199   DataSource &iSource;
200   std::vector<Buffer> iImages;
201   bool iEos;
202   bool iDeflated;
203 };
204 
getChar()205 int PsSource::getChar()
206 {
207   int ch = iSource.getChar();
208   if (ch == '\n')
209     iSource.getChar(); // remove '%'
210   return ch;
211 }
212 
readLine()213 String PsSource::readLine()
214 {
215   String s;
216   int ch = iSource.getChar();
217   while (ch != EOF && ch != '\n') {
218     s += char(ch);
219     ch = iSource.getChar();
220   }
221   iEos = (ch == EOF);
222   return s;
223 }
224 
image(int index) const225 Buffer PsSource::image(int index) const
226 {
227   if (1 <= index && index <= int(iImages.size()))
228     return iImages[index - 1];
229   else
230     return Buffer();
231 }
232 
skipToXml()233 bool PsSource::skipToXml()
234 {
235   iDeflated = false;
236 
237   String s1 = readLine();
238   String s2 = readLine();
239 
240   if (s1.substr(0, 11) != "%!PS-Adobe-" ||
241       s2.substr(0, 11) != "%%Creator: ")
242     return false;
243 
244   if (s2.substr(11, 6) == "Ipelib") {
245     // the 'modern' file format of Ipe 6.0 preview 17 and later
246     do {
247       s1 = readLine();
248       if (s1.substr(0, 17) == "%%BeginIpeImage: ") {
249 	Lex lex(s1.substr(17));
250 	int num, len;
251 	lex >> num >> len;
252 	if (num != int(iImages.size() + 1))
253 	  return false;
254 	(void) readLine();  // skip 'image'
255 	Buffer buf(len);
256 	A85Source a85(iSource);
257 	char *p = buf.data();
258 	char *p1 = p + buf.size();
259 	while (p < p1) {
260 	  int ch = a85.getChar();
261 	  if (ch == EOF)
262 	    return false;
263 	  *p++ = char(ch);
264 	}
265 	iImages.push_back(buf);
266       }
267     } while (!iEos && s1.substr(0, 13) != "%%BeginIpeXml");
268 
269     iDeflated = (s1.substr(13, 14) == ": /FlateDecode");
270 
271   } else {
272     // the 'old' file format generated through pdftops
273     do {
274       s1 = readLine();
275     } while (!iEos && s1.substr(0, 10) != "%%EndSetup");
276   }
277   if (iEos)
278     return false;
279   (void) iSource.getChar(); // skip '%' before <ipe>
280   return true;
281 }
282 
283 // --------------------------------------------------------------------
284 
285 class StreamParserPs : public StreamParser {
286 public:
StreamParserPs(PsSource & loader,DataSource & source,std::FILE * out)287   explicit StreamParserPs(PsSource &loader, DataSource &source,
288 			  std::FILE *out)
289     : StreamParser(source, out), iLoader(loader) { /* nothing */ }
290   virtual Buffer image(int objNum);
291 private:
292   PsSource &iLoader;
293 };
294 
image(int objNum)295 Buffer StreamParserPs::image(int objNum)
296 {
297   return iLoader.image(objNum);
298 }
299 
300 // --------------------------------------------------------------------
301 
extractPs(DataSource & source,std::FILE * out)302 static bool extractPs(DataSource &source, std::FILE *out)
303 {
304   PsSource psSource(source);
305   if (!psSource.skipToXml()) {
306     fprintf(stderr, "Could not find XML stream.\n");
307     return false;
308   }
309 
310   if (psSource.deflated()) {
311     A85Source a85(psSource);
312     InflateSource source(a85);
313     StreamParserPs parser(psSource, source, out);
314     return parser.parse();
315   } else {
316     StreamParserPs parser(psSource, psSource, out);
317     return parser.parse();
318   }
319   return false;
320 }
321 
extractPdf(DataSource & source,std::FILE * out)322 static bool extractPdf(DataSource &source, std::FILE *out)
323 {
324   PdfFile loader;
325   if (!loader.parse(source)) {
326     fprintf(stderr, "Error parsing PDF file - probably not an Ipe file.\n");
327     return false;
328   }
329 
330   // try ancient format version first (early previews of Ipe 6.0)
331   const PdfObj *obj = loader.catalog()->get("Ipe", &loader);
332 
333   // otherwise try most recent format (>= 7.2.11)
334   if (!obj) {
335     obj = loader.catalog()->get("PieceInfo", &loader);
336     if (obj && obj->dict()) {
337       obj = obj->dict()->get("Ipe", &loader);
338       if (obj && obj->dict())
339 	obj = obj->dict()->get("Private", &loader);
340     }
341   }
342 
343   if (!obj)
344     obj = loader.object(1);
345 
346   if (!obj || !obj->dict()) {
347     fprintf(stderr, "Input file does not contain an Ipe XML stream.\n");
348     return false;
349   }
350 
351   const PdfObj *type = obj->dict()->get("Type");
352   if (!type || !type->name() || type->name()->value() != "Ipe") {
353     fprintf(stderr, "Input file does not contain an Ipe XML stream.\n");
354     return false;
355   }
356 
357   Buffer buffer = obj->dict()->stream();
358   BufferSource xml(buffer);
359 
360   if (obj->dict()->deflated()) {
361     InflateSource xml1(xml);
362     StreamParserPdf parser(loader, xml1, out);
363     return parser.parse();
364   } else {
365     StreamParserPdf parser(loader, xml, out);
366     return parser.parse();
367   }
368 }
369 
370 // --------------------------------------------------------------------
371 
usage()372 static void usage()
373 {
374   fprintf(stderr,
375 	  "Usage: ipeextract ( <input.pdf> | <input.eps> ) [<output.xml>]\n"
376 	  "Ipeextract extracts the XML stream from a PDF or Postscript file\n"
377 	  "generated by any version of Ipe 6 or Ipe 7.\n"
378 	  );
379   exit(1);
380 }
381 
main(int argc,char * argv[])382 int main(int argc, char *argv[])
383 {
384   Platform::initLib(IPELIB_VERSION);
385 
386   // ensure one or two arguments
387   if (argc != 2 && argc != 3)
388     usage();
389 
390   const char *src = argv[1];
391   String dst;
392 
393   if (argc == 3) {
394     dst = argv[2];
395   } else {
396     String s = src;
397     if (s.right(4) == ".pdf" || s.right(4) == ".eps")
398       dst = s.left(s.size() - 3) + "xml";
399     else
400       dst = s + ".xml";
401   }
402 
403   std::FILE *fd = Platform::fopen(src, "rb");
404   if (!fd) {
405     std::fprintf(stderr, "Could not open '%s'\n", src);
406     exit(1);
407   }
408   FileSource source(fd);
409   TFormat format = fileFormat(source);
410   if (format == EXml) {
411     fprintf(stderr, "Input file is already in XML format.\n");
412   } else if (format == EIpe5) {
413     fprintf(stderr, "Input file is in Ipe5 format.\n"
414 	    "Run 'ipe5toxml' to convert it to XML format.\n");
415   } else {
416     std::rewind(fd);
417     std::FILE *out = Platform::fopen(dst.z(), "wb");
418     if (!out) {
419       fprintf(stderr, "Could not open '%s' for writing.\n", dst.z());
420     } else {
421       bool res = (format == EPdf) ?
422 	extractPdf(source, out) : extractPs(source, out);
423       if (!res)
424 	fprintf(stderr, "Error during extraction of XML stream.\n");
425       std::fclose(out);
426     }
427   }
428   std::fclose(fd);
429   return 0;
430 }
431 
432 // --------------------------------------------------------------------
433