1 /* AbiSource
2 *
3 * Copyright (C) 2011 Volodymyr Rudyj <vladimir.rudoy@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA.
19 */
20
21 #include "ie_imp_EPUB.h"
22
IE_Imp_EPUB(PD_Document * pDocument)23 IE_Imp_EPUB::IE_Imp_EPUB(PD_Document* pDocument) :
24 IE_Imp(pDocument)
25 {
26
27 }
28
~IE_Imp_EPUB()29 IE_Imp_EPUB::~IE_Imp_EPUB()
30 {
31
32 }
33
pasteFromBuffer(PD_DocumentRange * pDocRange,const unsigned char * pData,UT_uint32 lenData,const char *)34 bool IE_Imp_EPUB::pasteFromBuffer(PD_DocumentRange* pDocRange,
35 const unsigned char* pData, UT_uint32 lenData, const char* /*szEncoding*/)
36 {
37 UT_return_val_if_fail(getDoc() == pDocRange->m_pDoc,false);
38 UT_return_val_if_fail(pDocRange->m_pos1 == pDocRange->m_pos2,false);
39
40 PD_Document * newDoc = new PD_Document();
41 newDoc->createRawDocument();
42 IE_Imp_EPUB * pEPUBImp = new IE_Imp_EPUB(newDoc);
43 //
44 // Turn pData into something that can be imported by the open documenb
45 // importer.
46 //
47 GsfInput * pInStream = gsf_input_memory_new((const guint8 *) pData,
48 (gsf_off_t) lenData, FALSE);
49 pEPUBImp->loadFile(newDoc, pInStream);
50
51 newDoc->finishRawCreation();
52
53 IE_Imp_PasteListener * pPasteListen = new IE_Imp_PasteListener(getDoc(),
54 pDocRange->m_pos1, newDoc);
55 newDoc->tellListener(static_cast<PL_Listener *> (pPasteListen));
56 delete pPasteListen;
57 delete pEPUBImp;
58 UNREFP( newDoc);
59 return true;
60 }
61
_loadFile(GsfInput * input)62 UT_Error IE_Imp_EPUB::_loadFile(GsfInput* input)
63 {
64 m_epub = gsf_infile_zip_new(input, NULL);
65
66 if (m_epub == NULL)
67 {
68 UT_DEBUGMSG(("Can`t create gsf input zip object\n"));
69 return UT_ERROR;
70 }
71
72 UT_DEBUGMSG(("Reading metadata\n"));
73 if (readMetadata() != UT_OK)
74 {
75 UT_DEBUGMSG(("Failed to read metadata\n"));
76 return UT_ERROR;
77 }
78
79 UT_DEBUGMSG(("Reading package information\n"));
80 if (readPackage() != UT_OK)
81 {
82 UT_DEBUGMSG(("Failed to read package information\n"));
83 return UT_ERROR;
84 }
85
86 UT_DEBUGMSG(("Uncompressing OPS data\n"));
87 if (uncompress() != UT_OK)
88 {
89 UT_DEBUGMSG(("Failed to uncompress data\n"));
90 return UT_ERROR;
91 }
92
93 UT_DEBUGMSG(("Reading OPS data\n"));
94 if (readStructure() != UT_OK)
95 {
96 UT_DEBUGMSG(("Failed to read OPS data\n"));
97 return UT_ERROR;
98 }
99
100 return UT_OK;
101
102 }
103
readMetadata()104 UT_Error IE_Imp_EPUB::readMetadata()
105 {
106 GsfInput* metaInf = gsf_infile_child_by_name(m_epub, "META-INF");
107
108 if (metaInf == NULL)
109 {
110 UT_DEBUGMSG(("Can`t open container META-INF dir\n"));
111 return UT_ERROR;
112 }
113
114 GsfInput* meta = gsf_infile_child_by_name(GSF_INFILE(metaInf),
115 "container.xml");
116
117 if (meta == NULL)
118 {
119 UT_DEBUGMSG(("Can`t open container metadata\n"));
120 return UT_ERROR;
121 }
122
123 size_t metaSize = gsf_input_size(meta);
124
125 if (metaSize == 0)
126 {
127 UT_DEBUGMSG(("Container metadata file is empty\n"));
128 return UT_ERROR;
129 }
130
131 gchar* metaXml = (gchar*) gsf_input_read(meta, metaSize, NULL);
132
133 std::string rootfilePath;
134 UT_XML metaParser;
135 ContainerListener containerListener;
136 metaParser.setListener(&containerListener);
137
138 if (metaParser.sniff(metaXml, metaSize, "container"))
139 {
140 UT_DEBUGMSG(("Parsing container.xml file\n"));
141 metaParser.parse(metaXml, metaSize);
142 }
143 else
144 {
145 UT_DEBUGMSG(("Incorrect container.xml file\n"));
146 return UT_ERROR;
147 }
148
149 m_rootfilePath = containerListener.getRootFilePath();
150
151 g_object_unref(G_OBJECT(meta));
152 g_object_unref(G_OBJECT(metaInf));
153
154 return UT_OK;
155 }
156
readPackage()157 UT_Error IE_Imp_EPUB::readPackage()
158 {
159 gchar **aname = g_strsplit(m_rootfilePath.c_str(), G_DIR_SEPARATOR_S, 0);
160 GsfInput* opf = gsf_infile_child_by_aname(m_epub, (const char**) aname);
161
162 UT_DEBUGMSG(("Getting parent\n"));
163 GsfInfile* opfParent = gsf_input_container(opf);
164 m_opsDir = std::string(gsf_input_name(GSF_INPUT(opfParent)));
165
166 UT_DEBUGMSG(("OPS dir: %s\n", m_opsDir.c_str()));
167
168 if (opf == NULL)
169 {
170 UT_DEBUGMSG(("Can`t open .opf file\n"));
171 return UT_ERROR;
172 }
173
174 size_t opfSize = gsf_input_size(opf);
175 gchar* opfXml = (gchar*) gsf_input_read(opf, opfSize, NULL);
176
177 UT_XML opfParser;
178 OpfListener opfListener;
179 opfParser.setListener(&opfListener);
180 if (opfParser.sniff(opfXml, opfSize, "package"))
181 {
182 UT_DEBUGMSG(("Parsing opf file\n"));
183 opfParser.parse(opfXml, opfSize);
184 }
185 else
186 {
187 UT_DEBUGMSG(("Incorrect opf file found \n"));
188 return UT_ERROR;
189 }
190
191 g_strfreev(aname);
192 g_object_unref(G_OBJECT(opf));
193 //g_object_unref(G_OBJECT(opfParent));
194
195 m_spine = opfListener.getSpine();
196 m_manifestItems = opfListener.getManifestItems();
197
198 return UT_OK;
199 }
200
uncompress()201 UT_Error IE_Imp_EPUB::uncompress()
202 {
203 m_tmpDir = UT_go_filename_to_uri(g_get_tmp_dir());
204 m_tmpDir += G_DIR_SEPARATOR_S;
205 m_tmpDir += getDoc()->getDocUUIDString();
206
207 if (!UT_go_directory_create(m_tmpDir.c_str(), 0644, NULL))
208 {
209 UT_DEBUGMSG(("Can`t create temporary directory\n"));
210 return UT_ERROR;
211 }
212 GsfInput *opsDirInput = gsf_infile_child_by_name(m_epub,
213 m_opsDir.c_str());
214 UT_DEBUGMSG(("Child count : %d", gsf_infile_num_children(m_epub)));
215 if (opsDirInput == NULL)
216 {
217 UT_DEBUGMSG(("Failed to open OPS dir\n"));
218 return UT_ERROR;
219 }
220
221 for (std::map<std::string, std::string>::iterator i =
222 m_manifestItems.begin(); i != m_manifestItems.end(); i++)
223 {
224 gchar *itemFileName = UT_go_filename_from_uri(
225 (m_tmpDir + G_DIR_SEPARATOR_S + (*i).second).c_str());
226 gchar** aname =
227 g_strsplit((*i).second.c_str(), G_DIR_SEPARATOR_S, 0);
228
229 GsfInput* itemInput = gsf_infile_child_by_aname(
230 GSF_INFILE(opsDirInput), (const char**) aname);
231 GsfOutput* itemOutput = createFileByPath(itemFileName);
232 gsf_input_seek(itemInput, 0, G_SEEK_SET);
233 gsf_input_copy(itemInput, itemOutput);
234 g_strfreev(aname);
235 g_free(itemFileName);
236 g_object_unref(G_OBJECT(itemInput));
237 gsf_output_close(itemOutput);
238 }
239
240 g_object_unref(G_OBJECT(opsDirInput));
241
242 return UT_OK;
243 }
244
readStructure()245 UT_Error IE_Imp_EPUB::readStructure()
246 {
247 getDoc()->createRawDocument();
248 getDoc()->finishRawCreation();
249
250 for (std::vector<std::string>::iterator i = m_spine.begin(); i
251 != m_spine.end(); i++)
252 {
253 std::map<std::string, std::string>::iterator iter =
254 m_manifestItems.find(*i);
255
256 if (iter == m_manifestItems.end())
257 {
258 UT_DEBUGMSG(("Manifest item with id %s not found\n", (*i).c_str()));
259 return UT_ERROR;
260 }
261 std::string itemPath = m_tmpDir + G_DIR_SEPARATOR_S + (iter->second);
262 PT_DocPosition posEnd = 0;
263 getDoc()->getBounds(true, posEnd);
264
265 if (i != m_spine.begin())
266 {
267 getDoc()->insertStrux(posEnd, PTX_Section, NULL, NULL);
268 getDoc()->insertStrux(posEnd+1, PTX_Block, NULL, NULL);
269 posEnd+=2;
270 }
271
272 GsfInput* itemInput = UT_go_file_open(itemPath.c_str(), NULL);
273 if (itemInput == NULL)
274 {
275 UT_DEBUGMSG(("Can`t open item for reading\n"));
276 return UT_ERROR;
277 }
278
279 PD_Document *currentDoc = new PD_Document();
280 currentDoc->createRawDocument();
281 const char *suffix = strchr(itemPath.c_str(), '.');
282 XAP_App::getApp()->getPrefs()->setIgnoreNextRecent();
283 if (currentDoc->importFile(itemPath.c_str(),
284 IE_Imp::fileTypeForSuffix(suffix), true, false, NULL) != UT_OK)
285 {
286 UT_DEBUGMSG(("Failed to import file %s\n", itemPath.c_str()));
287 return UT_ERROR;
288 }
289
290 currentDoc->finishRawCreation();
291 // const gchar * attributes[3] = {
292 // "listid",
293 // "0",
294 // 0
295 // };
296
297 // PT_DocPosition pos;
298 // currentDoc->getBounds(true, pos);
299 // currentDoc->insertStrux(pos, PTX_Block, attributes, NULL, NULL);
300
301 IE_Imp_PasteListener * pPasteListener = new IE_Imp_PasteListener(
302 getDoc(), posEnd, currentDoc);
303 currentDoc->tellListener(static_cast<PL_Listener *> (pPasteListener));
304
305
306 DELETEP(pPasteListener);
307 UNREFP(currentDoc);
308 g_object_unref(G_OBJECT(itemInput));
309 }
310
311 return UT_OK;
312 }
313
createFileByPath(const char * path)314 GsfOutput* IE_Imp_EPUB::createFileByPath(const char* path)
315 {
316 gchar** components = g_strsplit(path, G_DIR_SEPARATOR_S, 0);
317 std::string curPath = "";
318
319 int current = 0;
320 GsfOutput* output = NULL;
321 while (components[current] != NULL)
322 {
323 curPath += components[current];
324 current++;
325
326 char *uri = UT_go_filename_to_uri(curPath.c_str());
327 bool fileExists = UT_go_file_exists(uri);
328 if (!fileExists && (components[current] != NULL))
329 {
330 UT_go_directory_create(uri, 0644, NULL);
331 }
332 else
333 {
334 if (!fileExists)
335 {
336 output = UT_go_file_create(uri, NULL);
337 break;
338 }
339 }
340
341 g_free(uri);
342
343 if (components[current] != NULL)
344 {
345 curPath += G_DIR_SEPARATOR_S;
346 }
347 }
348
349 g_strfreev(components);
350 return output;
351 }
352
startElement(const gchar * name,const gchar ** atts)353 void ContainerListener::startElement(const gchar* name, const gchar** atts)
354 {
355 if (!UT_go_utf8_collate_casefold(name, "rootfile"))
356 {
357 m_rootFilePath = std::string(UT_getAttribute("full-path", atts));
358 UT_DEBUGMSG(("Found rootfile%s\n", m_rootFilePath.c_str()));
359 }
360 }
361
endElement(const gchar *)362 void ContainerListener::endElement(const gchar* /*name*/)
363 {
364 }
365
charData(const gchar *,int)366 void ContainerListener::charData(const gchar* /*buffer*/, int /*length*/)
367 {
368
369 }
370
getRootFilePath() const371 const std::string & ContainerListener::getRootFilePath() const
372 {
373 return m_rootFilePath;
374 }
375
376 /*
377
378 */
379
OpfListener()380 OpfListener::OpfListener() :
381 m_inManifest(false)
382 {
383
384 }
385
startElement(const gchar * name,const gchar ** atts)386 void OpfListener::startElement(const gchar* name, const gchar** atts)
387 {
388 if (!UT_go_utf8_collate_casefold(name, "manifest"))
389 {
390 m_inManifest = true;
391 }
392
393 if (!UT_go_utf8_collate_casefold(name, "spine"))
394 {
395 m_inSpine = true;
396 }
397
398 if (m_inManifest)
399 {
400 if (!UT_go_utf8_collate_casefold(name, "item"))
401 {
402 m_manifestItems.insert(
403 make_pair(std::string(UT_getAttribute("id", atts)),
404 std::string(UT_getAttribute("href", atts))));
405 UT_DEBUGMSG(("Found manifest item: %s\n", UT_getAttribute("href", atts)));
406 }
407 }
408
409 if (m_inSpine)
410 {
411 if (!UT_go_utf8_collate_casefold(name, "itemref"))
412 {
413 // We can ignore "linear" attribute as it said in specification
414 m_spine.push_back(std::string(UT_getAttribute("idref", atts)));
415 UT_DEBUGMSG(("Found spine itemref: %s\n", UT_getAttribute("idref", atts)));
416 }
417 }
418
419 }
420
endElement(const gchar *)421 void OpfListener::endElement(const gchar* /*name*/)
422 {
423
424 }
425
charData(const gchar *,int)426 void OpfListener::charData(const gchar* /*buffer*/, int /*length*/)
427 {
428
429 }
430
431 /*
432
433 */
434
startElement(const gchar *,const gchar **)435 void NavigationListener::startElement(const gchar* /*name*/, const gchar** /*atts*/)
436 {
437
438 }
439
endElement(const gchar *)440 void NavigationListener::endElement(const gchar* /*name*/)
441 {
442
443 }
444
charData(const gchar *,int)445 void NavigationListener::charData(const gchar* /*buffer*/, int /*length*/)
446 {
447
448 }
449