1 /* AbiSource
2  *
3  * Copyright (C) 2011 Volodymyr Rudyj <vladimir.rudoy@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18  * 02110-1301 USA.
19  */
20 
21 #include "ie_imp_EPUB.h"
22 
IE_Imp_EPUB(PD_Document * pDocument)23 IE_Imp_EPUB::IE_Imp_EPUB(PD_Document* pDocument) :
24     IE_Imp(pDocument)
25 {
26 
27 }
28 
~IE_Imp_EPUB()29 IE_Imp_EPUB::~IE_Imp_EPUB()
30 {
31 
32 }
33 
pasteFromBuffer(PD_DocumentRange * pDocRange,const unsigned char * pData,UT_uint32 lenData,const char *)34 bool IE_Imp_EPUB::pasteFromBuffer(PD_DocumentRange* pDocRange,
35 				  const unsigned char* pData, UT_uint32 lenData, const char* /*szEncoding*/)
36 {
37     UT_return_val_if_fail(getDoc() == pDocRange->m_pDoc,false);
38     UT_return_val_if_fail(pDocRange->m_pos1 == pDocRange->m_pos2,false);
39 
40     PD_Document * newDoc = new PD_Document();
41     newDoc->createRawDocument();
42     IE_Imp_EPUB * pEPUBImp = new IE_Imp_EPUB(newDoc);
43     //
44     // Turn pData into something that can be imported by the open documenb
45     // importer.
46     //
47     GsfInput * pInStream = gsf_input_memory_new((const guint8 *) pData,
48             (gsf_off_t) lenData, FALSE);
49     pEPUBImp->loadFile(newDoc, pInStream);
50 
51     newDoc->finishRawCreation();
52 
53     IE_Imp_PasteListener * pPasteListen = new IE_Imp_PasteListener(getDoc(),
54             pDocRange->m_pos1, newDoc);
55     newDoc->tellListener(static_cast<PL_Listener *> (pPasteListen));
56     delete pPasteListen;
57     delete pEPUBImp;
58     UNREFP( newDoc);
59     return true;
60 }
61 
_loadFile(GsfInput * input)62 UT_Error IE_Imp_EPUB::_loadFile(GsfInput* input)
63 {
64     m_epub = gsf_infile_zip_new(input, NULL);
65 
66     if (m_epub == NULL)
67     {
68         UT_DEBUGMSG(("Can`t create gsf input zip object\n"));
69         return UT_ERROR;
70     }
71 
72     UT_DEBUGMSG(("Reading metadata\n"));
73     if (readMetadata() != UT_OK)
74     {
75         UT_DEBUGMSG(("Failed to read metadata\n"));
76         return UT_ERROR;
77     }
78 
79     UT_DEBUGMSG(("Reading package information\n"));
80     if (readPackage() != UT_OK)
81     {
82         UT_DEBUGMSG(("Failed to read package information\n"));
83         return UT_ERROR;
84     }
85 
86     UT_DEBUGMSG(("Uncompressing OPS data\n"));
87     if (uncompress() != UT_OK)
88     {
89         UT_DEBUGMSG(("Failed to uncompress data\n"));
90         return UT_ERROR;
91     }
92 
93     UT_DEBUGMSG(("Reading OPS data\n"));
94     if (readStructure() != UT_OK)
95     {
96         UT_DEBUGMSG(("Failed to read OPS data\n"));
97         return UT_ERROR;
98     }
99 
100     return UT_OK;
101 
102 }
103 
readMetadata()104 UT_Error IE_Imp_EPUB::readMetadata()
105 {
106     GsfInput* metaInf = gsf_infile_child_by_name(m_epub, "META-INF");
107 
108     if (metaInf == NULL)
109     {
110         UT_DEBUGMSG(("Can`t open container META-INF dir\n"));
111         return UT_ERROR;
112     }
113 
114     GsfInput* meta = gsf_infile_child_by_name(GSF_INFILE(metaInf),
115             "container.xml");
116 
117     if (meta == NULL)
118     {
119         UT_DEBUGMSG(("Can`t open container metadata\n"));
120         return UT_ERROR;
121     }
122 
123     size_t metaSize = gsf_input_size(meta);
124 
125     if (metaSize == 0)
126     {
127         UT_DEBUGMSG(("Container metadata file is empty\n"));
128         return UT_ERROR;
129     }
130 
131     gchar* metaXml = (gchar*) gsf_input_read(meta, metaSize, NULL);
132 
133     std::string rootfilePath;
134     UT_XML metaParser;
135     ContainerListener containerListener;
136     metaParser.setListener(&containerListener);
137 
138     if (metaParser.sniff(metaXml, metaSize, "container"))
139     {
140         UT_DEBUGMSG(("Parsing container.xml file\n"));
141         metaParser.parse(metaXml, metaSize);
142     }
143     else
144     {
145         UT_DEBUGMSG(("Incorrect container.xml file\n"));
146         return UT_ERROR;
147     }
148 
149     m_rootfilePath = containerListener.getRootFilePath();
150 
151     g_object_unref(G_OBJECT(meta));
152     g_object_unref(G_OBJECT(metaInf));
153 
154     return UT_OK;
155 }
156 
readPackage()157 UT_Error IE_Imp_EPUB::readPackage()
158 {
159     gchar **aname = g_strsplit(m_rootfilePath.c_str(), G_DIR_SEPARATOR_S, 0);
160     GsfInput* opf = gsf_infile_child_by_aname(m_epub, (const char**) aname);
161 
162     UT_DEBUGMSG(("Getting parent\n"));
163     GsfInfile* opfParent = gsf_input_container(opf);
164     m_opsDir = std::string(gsf_input_name(GSF_INPUT(opfParent)));
165 
166     UT_DEBUGMSG(("OPS dir: %s\n", m_opsDir.c_str()));
167 
168     if (opf == NULL)
169     {
170         UT_DEBUGMSG(("Can`t open .opf file\n"));
171         return UT_ERROR;
172     }
173 
174     size_t opfSize = gsf_input_size(opf);
175     gchar* opfXml = (gchar*) gsf_input_read(opf, opfSize, NULL);
176 
177     UT_XML opfParser;
178     OpfListener opfListener;
179     opfParser.setListener(&opfListener);
180     if (opfParser.sniff(opfXml, opfSize, "package"))
181     {
182         UT_DEBUGMSG(("Parsing opf file\n"));
183         opfParser.parse(opfXml, opfSize);
184     }
185     else
186     {
187         UT_DEBUGMSG(("Incorrect opf file found \n"));
188         return UT_ERROR;
189     }
190 
191     g_strfreev(aname);
192     g_object_unref(G_OBJECT(opf));
193     //g_object_unref(G_OBJECT(opfParent));
194 
195     m_spine = opfListener.getSpine();
196     m_manifestItems = opfListener.getManifestItems();
197 
198     return UT_OK;
199 }
200 
uncompress()201 UT_Error IE_Imp_EPUB::uncompress()
202 {
203     m_tmpDir = UT_go_filename_to_uri(g_get_tmp_dir());
204     m_tmpDir += G_DIR_SEPARATOR_S;
205     m_tmpDir += getDoc()->getDocUUIDString();
206 
207     if (!UT_go_directory_create(m_tmpDir.c_str(), 0644, NULL))
208     {
209         UT_DEBUGMSG(("Can`t create temporary directory\n"));
210         return UT_ERROR;
211     }
212     GsfInput *opsDirInput = gsf_infile_child_by_name(m_epub,
213             m_opsDir.c_str());
214     UT_DEBUGMSG(("Child count : %d", gsf_infile_num_children(m_epub)));
215     if (opsDirInput == NULL)
216     {
217         UT_DEBUGMSG(("Failed to open OPS dir\n"));
218         return UT_ERROR;
219     }
220 
221     for (std::map<std::string, std::string>::iterator i =
222             m_manifestItems.begin(); i != m_manifestItems.end(); i++)
223     {
224         gchar *itemFileName = UT_go_filename_from_uri(
225                 (m_tmpDir + G_DIR_SEPARATOR_S + (*i).second).c_str());
226         gchar** aname =
227                 g_strsplit((*i).second.c_str(), G_DIR_SEPARATOR_S, 0);
228 
229         GsfInput* itemInput = gsf_infile_child_by_aname(
230                 GSF_INFILE(opsDirInput), (const char**) aname);
231         GsfOutput* itemOutput = createFileByPath(itemFileName);
232         gsf_input_seek(itemInput, 0, G_SEEK_SET);
233         gsf_input_copy(itemInput, itemOutput);
234         g_strfreev(aname);
235         g_free(itemFileName);
236         g_object_unref(G_OBJECT(itemInput));
237         gsf_output_close(itemOutput);
238     }
239 
240     g_object_unref(G_OBJECT(opsDirInput));
241 
242     return UT_OK;
243 }
244 
readStructure()245 UT_Error IE_Imp_EPUB::readStructure()
246 {
247     getDoc()->createRawDocument();
248     getDoc()->finishRawCreation();
249 
250     for (std::vector<std::string>::iterator i = m_spine.begin(); i
251             != m_spine.end(); i++)
252     {
253         std::map<std::string, std::string>::iterator iter =
254                 m_manifestItems.find(*i);
255 
256         if (iter == m_manifestItems.end())
257         {
258             UT_DEBUGMSG(("Manifest item with id %s not found\n", (*i).c_str()));
259             return UT_ERROR;
260         }
261 	std::string itemPath = m_tmpDir + G_DIR_SEPARATOR_S + (iter->second);
262         PT_DocPosition posEnd = 0;
263         getDoc()->getBounds(true, posEnd);
264 
265         if (i != m_spine.begin())
266         {
267             getDoc()->insertStrux(posEnd, PTX_Section, NULL, NULL);
268             getDoc()->insertStrux(posEnd+1, PTX_Block, NULL, NULL);
269             posEnd+=2;
270         }
271 
272         GsfInput* itemInput = UT_go_file_open(itemPath.c_str(), NULL);
273         if (itemInput == NULL)
274         {
275             UT_DEBUGMSG(("Can`t open item for reading\n"));
276             return UT_ERROR;
277         }
278 
279         PD_Document *currentDoc = new PD_Document();
280         currentDoc->createRawDocument();
281         const char *suffix = strchr(itemPath.c_str(), '.');
282         XAP_App::getApp()->getPrefs()->setIgnoreNextRecent();
283         if (currentDoc->importFile(itemPath.c_str(),
284                 IE_Imp::fileTypeForSuffix(suffix), true, false, NULL) != UT_OK)
285         {
286             UT_DEBUGMSG(("Failed to import file %s\n", itemPath.c_str()));
287             return UT_ERROR;
288         }
289 
290         currentDoc->finishRawCreation();
291         // const gchar * attributes[3] = {
292         //     "listid",
293         //     "0",
294         //     0
295         // };
296 
297         // PT_DocPosition pos;
298         // currentDoc->getBounds(true, pos);
299         // currentDoc->insertStrux(pos, PTX_Block, attributes, NULL, NULL);
300 
301         IE_Imp_PasteListener * pPasteListener = new IE_Imp_PasteListener(
302                 getDoc(), posEnd, currentDoc);
303         currentDoc->tellListener(static_cast<PL_Listener *> (pPasteListener));
304 
305 
306         DELETEP(pPasteListener);
307         UNREFP(currentDoc);
308         g_object_unref(G_OBJECT(itemInput));
309     }
310 
311     return UT_OK;
312 }
313 
createFileByPath(const char * path)314 GsfOutput* IE_Imp_EPUB::createFileByPath(const char* path)
315 {
316     gchar** components = g_strsplit(path, G_DIR_SEPARATOR_S, 0);
317     std::string curPath = "";
318 
319     int current = 0;
320     GsfOutput* output = NULL;
321     while (components[current] != NULL)
322     {
323         curPath += components[current];
324         current++;
325 
326         char *uri = UT_go_filename_to_uri(curPath.c_str());
327         bool fileExists = UT_go_file_exists(uri);
328         if (!fileExists && (components[current] != NULL))
329         {
330             UT_go_directory_create(uri, 0644, NULL);
331         }
332         else
333         {
334             if (!fileExists)
335             {
336                 output = UT_go_file_create(uri, NULL);
337                 break;
338             }
339         }
340 
341         g_free(uri);
342 
343         if (components[current] != NULL)
344         {
345             curPath += G_DIR_SEPARATOR_S;
346         }
347     }
348 
349     g_strfreev(components);
350     return output;
351 }
352 
startElement(const gchar * name,const gchar ** atts)353 void ContainerListener::startElement(const gchar* name, const gchar** atts)
354 {
355     if (!UT_go_utf8_collate_casefold(name, "rootfile"))
356     {
357         m_rootFilePath = std::string(UT_getAttribute("full-path", atts));
358         UT_DEBUGMSG(("Found rootfile%s\n", m_rootFilePath.c_str()));
359     }
360 }
361 
endElement(const gchar *)362 void ContainerListener::endElement(const gchar* /*name*/)
363 {
364 }
365 
charData(const gchar *,int)366 void ContainerListener::charData(const gchar* /*buffer*/, int /*length*/)
367 {
368 
369 }
370 
getRootFilePath() const371 const std::string & ContainerListener::getRootFilePath() const
372 {
373     return m_rootFilePath;
374 }
375 
376 /*
377 
378  */
379 
OpfListener()380 OpfListener::OpfListener() :
381     m_inManifest(false)
382 {
383 
384 }
385 
startElement(const gchar * name,const gchar ** atts)386 void OpfListener::startElement(const gchar* name, const gchar** atts)
387 {
388     if (!UT_go_utf8_collate_casefold(name, "manifest"))
389     {
390         m_inManifest = true;
391     }
392 
393     if (!UT_go_utf8_collate_casefold(name, "spine"))
394     {
395         m_inSpine = true;
396     }
397 
398     if (m_inManifest)
399     {
400         if (!UT_go_utf8_collate_casefold(name, "item"))
401         {
402             m_manifestItems.insert(
403 				   make_pair(std::string(UT_getAttribute("id", atts)),
404 					     std::string(UT_getAttribute("href", atts))));
405             UT_DEBUGMSG(("Found manifest item: %s\n", UT_getAttribute("href", atts)));
406         }
407     }
408 
409     if (m_inSpine)
410     {
411         if (!UT_go_utf8_collate_casefold(name, "itemref"))
412         {
413             // We can ignore "linear" attribute as it said in specification
414 	    m_spine.push_back(std::string(UT_getAttribute("idref", atts)));
415             UT_DEBUGMSG(("Found spine itemref: %s\n", UT_getAttribute("idref", atts)));
416         }
417     }
418 
419 }
420 
endElement(const gchar *)421 void OpfListener::endElement(const gchar* /*name*/)
422 {
423 
424 }
425 
charData(const gchar *,int)426 void OpfListener::charData(const gchar* /*buffer*/, int /*length*/)
427 {
428 
429 }
430 
431 /*
432 
433  */
434 
startElement(const gchar *,const gchar **)435 void NavigationListener::startElement(const gchar* /*name*/, const gchar** /*atts*/)
436 {
437 
438 }
439 
endElement(const gchar *)440 void NavigationListener::endElement(const gchar* /*name*/)
441 {
442 
443 }
444 
charData(const gchar *,int)445 void NavigationListener::charData(const gchar* /*buffer*/, int /*length*/)
446 {
447 
448 }
449