1 /* this file is part of xreader, a mate document viewer
2  *
3  *  Copyright (C) 2014 Avishkar Gupta
4  *
5  *  Author:
6  *   Avishkar Gupta <avishkar.gupta.delhi@gmail.com>
7  *
8  * Xreader is free software; you can redistribute it and/or modify it
9  * under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * Xreader is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  */
22 
23 #include "epub-document.h"
24 #include "ev-file-helpers.h"
25 #include "unzip.h"
26 #include "ev-document-thumbnails.h"
27 #include "ev-document-find.h"
28 #include "ev-backends-manager.h"
29 #include "ev-document-links.h"
30 #include "ev-document-misc.h"
31 #include <libxml/parser.h>
32 #include <libxml/xmlmemory.h>
33 #include <libxml/HTMLparser.h>
34 #include <config.h>
35 
36 #include <glib/gi18n.h>
37 #include <glib/gstdio.h>
38 
39 #include <gtk/gtk.h>
40 
41 /*For strcasestr(),strstr()*/
42 #include <string.h>
43 
44 typedef enum _xmlParseReturnType
45 {
46     XML_ATTRIBUTE,
47     XML_KEYWORD
48 }xmlParseReturnType;
49 
50 typedef struct _contentListNode {
51     gchar* key ;
52     gchar* value ;
53 	gint index ;
54 }contentListNode;
55 
56 typedef struct _linknode {
57     gchar *pagelink;
58 	GList *children;
59     gchar *linktext;
60 	guint page;
61 }linknode;
62 
63 typedef struct _EpubDocumentClass EpubDocumentClass;
64 
65 struct _EpubDocumentClass
66 {
67     EvDocumentClass parent_class;
68 };
69 
70 struct _EpubDocument
71 {
72     EvDocument parent_instance;
73 	/*Stores the path to the source archive*/
74     gchar* archivename ;
75 	/*Stores the path of the directory where we unzipped the epub*/
76     gchar* tmp_archive_dir ;
77 	/*Stores the contentlist in a sorted manner*/
78     GList* contentList ;
79     /* A variable to hold our epubDocument for unzipping*/
80     unzFile epubDocument ;
81 	/*The (sub)directory that actually houses the document*/
82 	gchar* documentdir;
83 	/*Stores the table of contents*/
84 	GList *index;
85 	/*Document title, for the sidebar links*/
86 	gchar *docTitle;
87 };
88 
89 static void       epub_document_document_thumbnails_iface_init (EvDocumentThumbnailsInterface *iface);
90 static void       epub_document_document_find_iface_init       (EvDocumentFindInterface       *iface);
91 static void       epub_document_document_links_iface_init      (EvDocumentLinksInterface      *iface);
92 
93 EV_BACKEND_REGISTER_WITH_CODE (EpubDocument, epub_document,
94 	{
95 		EV_BACKEND_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_THUMBNAILS,
96 						epub_document_document_thumbnails_iface_init);
97 		 EV_BACKEND_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_FIND,
98 								 epub_document_document_find_iface_init);
99         EV_BACKEND_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_LINKS,
100                                  epub_document_document_links_iface_init);
101 
102 	} );
103 
104 static void
epub_document_thumbnails_get_dimensions(EvDocumentThumbnails * document,EvRenderContext * rc,gint * width,gint * height)105 epub_document_thumbnails_get_dimensions (EvDocumentThumbnails *document,
106                                          EvRenderContext      *rc,
107                                          gint                 *width,
108                                          gint                 *height)
109 {
110 	gdouble page_width, page_height;
111 
112 	page_width = 800;
113 	page_height = 1080;
114 
115 	*width = MAX ((gint)(page_width * rc->scale + 0.5), 1);
116 	*height = MAX ((gint)(page_height * rc->scale + 0.5), 1);
117 }
118 
119 static GdkPixbuf *
epub_document_thumbnails_get_thumbnail(EvDocumentThumbnails * document,EvRenderContext * rc,gboolean border)120 epub_document_thumbnails_get_thumbnail (EvDocumentThumbnails *document,
121                                         EvRenderContext      *rc,
122                                         gboolean              border)
123 {
124 	cairo_surface_t *webpage;
125 	GdkPixbuf *thumbnailpix = NULL ;
126 	gint width,height;
127 	epub_document_thumbnails_get_dimensions (document, rc, &width, &height);
128 	webpage = ev_document_misc_surface_rotate_and_scale (rc->page->backend_page,
129 	                                                     width, height, 0);
130 	thumbnailpix = ev_document_misc_pixbuf_from_surface (webpage);
131 	return thumbnailpix;
132 }
133 
134 static gboolean
in_tag(const char * found)135 in_tag(const char* found)
136 {
137     const char* bracket = found ;
138 
139     /* Since the dump started with the body tag, the '<' will be the first
140      * character in the haystack.
141      */
142     while (*bracket != '<') {
143         bracket--;
144         if (*bracket == '>') {
145             /*We encounted a close brace before an open*/
146             return FALSE ;
147         }
148     }
149 
150     return TRUE;
151 }
152 
153 static int
get_substr_count(const char * haystack,const char * needle,gboolean case_sensitive)154 get_substr_count(const char * haystack,const char *needle,gboolean case_sensitive)
155 {
156     const char* tmp = haystack ;
157     char* (*string_compare_function)(const char*,const char*);
158     int count=0;
159     if (case_sensitive) {
160         string_compare_function = strstr ;
161     }
162     else {
163         string_compare_function = strcasestr;
164     }
165 
166     while ((tmp=string_compare_function(tmp,needle))) {
167         if (!in_tag(tmp)) {
168             count++;
169         }
170         tmp = tmp + strlen(needle);
171     }
172 
173     return count;
174 }
175 
176 static guint
epub_document_check_hits(EvDocumentFind * document_find,EvPage * page,const gchar * text,gboolean case_sensitive)177 epub_document_check_hits(EvDocumentFind *document_find,
178                          EvPage         *page,
179                          const gchar    *text,
180                          gboolean        case_sensitive)
181 {
182 	gchar *filepath = g_filename_from_uri((gchar*)page->backend_page,NULL,NULL);
183 	htmlDocPtr htmldoc =  xmlParseFile(filepath);
184 
185 	if (htmldoc == NULL) {
186 		g_free (filepath);
187 		return 0;
188 	}
189 
190 	htmlNodePtr htmltag = xmlDocGetRootElement(htmldoc);
191 	if(htmltag == NULL) {
192 		xmlFreeDoc(htmldoc);
193 		g_free (filepath);
194 		return 0;
195 	}
196 
197 	int count=0;
198 	htmlNodePtr bodytag = htmltag->xmlChildrenNode;
199 
200 	while ( xmlStrcmp(bodytag->name,(xmlChar*)"body") ) {
201 		bodytag = bodytag->next;
202 	}
203 
204 	xmlBufferPtr bodybuffer = xmlBufferCreate();
205 	xmlNodeDump(bodybuffer,htmldoc,bodytag,0,1);
206 
207 	count = get_substr_count((char*)bodybuffer->content,text,case_sensitive);
208 
209 	xmlBufferFree(bodybuffer);
210 	xmlFreeDoc(htmldoc);
211 	g_free (filepath);
212 
213 	return count;
214 }
215 
216 static gboolean
epub_document_links_has_document_links(EvDocumentLinks * document_links)217 epub_document_links_has_document_links(EvDocumentLinks *document_links)
218 {
219     EpubDocument *epub_document = EPUB_DOCUMENT(document_links);
220 
221     g_return_val_if_fail(EPUB_IS_DOCUMENT(epub_document), FALSE);
222 
223     if (!epub_document->index)
224         return FALSE;
225 
226     return TRUE;
227 }
228 
229 
230 typedef struct _LinksCBStruct {
231 	GtkTreeModel *model;
232 	GtkTreeIter  *parent;
233 }LinksCBStruct;
234 
235 static void
epub_document_make_tree_entry(linknode * ListData,LinksCBStruct * UserData)236 epub_document_make_tree_entry(linknode* ListData,LinksCBStruct* UserData)
237 {
238 	GtkTreeIter tree_iter;
239 	EvLink *link = NULL;
240 	gboolean expand;
241 	char *title_markup;
242 
243 	if (ListData->children) {
244 		expand=TRUE;
245 	}
246 	else {
247 		expand=FALSE;
248 	}
249 
250 	EvLinkDest *ev_dest = NULL;
251 	EvLinkAction *ev_action;
252 
253 	/* We shall use a EV_LINK_DEST_TYPE_PAGE for page links,
254 	 * and a EV_LINK_DEST_TYPE_HLINK(custom) for refs on a page of type url#label
255 	 * because we need both dest and page label for this.
256 	 */
257 
258 	if (g_strrstr(ListData->pagelink,"#") == NULL) {
259 		ev_dest = ev_link_dest_new_page(ListData->page);
260 	}
261 	else {
262 		ev_dest = ev_link_dest_new_hlink((gchar*)ListData->pagelink,ListData->page);
263 	}
264 
265 	ev_action = ev_link_action_new_dest (ev_dest);
266 
267 	link = ev_link_new((gchar*)ListData->linktext,ev_action);
268 
269 	gtk_tree_store_append (GTK_TREE_STORE (UserData->model), &tree_iter,(UserData->parent));
270 	title_markup = g_strdup((gchar*)ListData->linktext);
271 
272 	gtk_tree_store_set (GTK_TREE_STORE (UserData->model), &tree_iter,
273 			    EV_DOCUMENT_LINKS_COLUMN_MARKUP, title_markup,
274 			    EV_DOCUMENT_LINKS_COLUMN_LINK, link,
275 			    EV_DOCUMENT_LINKS_COLUMN_EXPAND, expand,
276 			    -1);
277 
278 	if (ListData->children) {
279 		LinksCBStruct cbstruct;
280 		cbstruct.parent = &tree_iter;
281 		cbstruct.model = UserData->model;
282 		g_list_foreach (ListData->children,(GFunc)epub_document_make_tree_entry,&cbstruct);
283 	}
284 
285 	g_free (title_markup);
286 	g_object_unref (link);
287 }
288 
289 static GtkTreeModel *
epub_document_links_get_links_model(EvDocumentLinks * document_links)290 epub_document_links_get_links_model(EvDocumentLinks *document_links)
291 {
292     GtkTreeModel *model = NULL;
293 
294 	g_return_val_if_fail (EPUB_IS_DOCUMENT (document_links), NULL);
295 
296     EpubDocument *epub_document = EPUB_DOCUMENT(document_links);
297 
298     model = (GtkTreeModel*) gtk_tree_store_new (EV_DOCUMENT_LINKS_COLUMN_NUM_COLUMNS,
299                                                 G_TYPE_STRING,
300                                                 G_TYPE_OBJECT,
301                                                 G_TYPE_BOOLEAN,
302                                                 G_TYPE_STRING);
303 
304 	LinksCBStruct linkStruct;
305 	linkStruct.model = model;
306 	EvLink *link = ev_link_new(epub_document->docTitle,
307 	                           ev_link_action_new_dest(ev_link_dest_new_page(0)));
308 	GtkTreeIter parent;
309 
310 	linkStruct.parent = &parent;
311 
312 	gtk_tree_store_append (GTK_TREE_STORE (model), &parent,NULL);
313 
314 	gtk_tree_store_set (GTK_TREE_STORE (model), &parent,
315 			    EV_DOCUMENT_LINKS_COLUMN_MARKUP, epub_document->docTitle,
316 			    EV_DOCUMENT_LINKS_COLUMN_LINK, link,
317 			    EV_DOCUMENT_LINKS_COLUMN_EXPAND, TRUE,
318 			    -1);
319 
320 	g_object_unref(link);
321 
322 	if (epub_document->index) {
323 		g_list_foreach (epub_document->index,(GFunc)epub_document_make_tree_entry,&linkStruct);
324 	}
325 
326     return model;
327 }
328 
329 static EvMappingList *
epub_document_links_get_links(EvDocumentLinks * document_links,EvPage * page)330 epub_document_links_get_links (EvDocumentLinks *document_links,
331 			       EvPage	       *page)
332 {
333 	/* TODO
334 	 * ev_mapping_list_new()
335 	 */
336 	return NULL;
337 }
338 
339 static void
epub_document_document_thumbnails_iface_init(EvDocumentThumbnailsInterface * iface)340 epub_document_document_thumbnails_iface_init (EvDocumentThumbnailsInterface *iface)
341 {
342 	iface->get_thumbnail = epub_document_thumbnails_get_thumbnail;
343 	iface->get_dimensions = epub_document_thumbnails_get_dimensions;
344 }
345 
346 static void
epub_document_document_find_iface_init(EvDocumentFindInterface * iface)347 epub_document_document_find_iface_init (EvDocumentFindInterface *iface)
348 {
349 	iface->check_for_hits = epub_document_check_hits;
350 }
351 
352 static void
epub_document_document_links_iface_init(EvDocumentLinksInterface * iface)353 epub_document_document_links_iface_init(EvDocumentLinksInterface *iface)
354 {
355     iface->has_document_links = epub_document_links_has_document_links;
356     iface->get_links_model = epub_document_links_get_links_model;
357     iface->get_links = epub_document_links_get_links;
358 }
359 
360 static gboolean
epub_document_save(EvDocument * document,const char * uri,GError ** error)361 epub_document_save (EvDocument *document,
362                     const char *uri,
363                     GError    **error)
364 {
365     EpubDocument *epub_document = EPUB_DOCUMENT (document);
366 
367     gchar *source_uri = g_filename_to_uri (epub_document->archivename, NULL, error);
368     if (source_uri == NULL)
369         return FALSE;
370 
371     return ev_xfer_uri_simple (source_uri, uri, error);
372 }
373 
374 static int
epub_document_get_n_pages(EvDocument * document)375 epub_document_get_n_pages (EvDocument *document)
376 {
377     EpubDocument *epub_document = EPUB_DOCUMENT (document);
378 
379     if (epub_document-> contentList == NULL)
380         return 0;
381 
382     return g_list_length(epub_document->contentList);
383 }
384 
385 /**
386  * epub_remove_temporary_dir : Removes a directory recursively.
387  * This function is same as comics_remove_temporary_dir
388  * Returns:
389  *   	0 if it was successfully deleted,
390  * 	-1 if an error occurred
391  */
392 static int
epub_remove_temporary_dir(gchar * path_name)393 epub_remove_temporary_dir (gchar *path_name)
394 {
395 	GDir  *content_dir;
396 	const gchar *filename;
397 	gchar *filename_with_path;
398 
399 	if (g_file_test (path_name, G_FILE_TEST_IS_DIR)) {
400 		content_dir = g_dir_open  (path_name, 0, NULL);
401 		filename  = g_dir_read_name (content_dir);
402 		while (filename) {
403 			filename_with_path =
404 				g_build_filename (path_name,
405 						  filename, NULL);
406 			epub_remove_temporary_dir (filename_with_path);
407 			g_free (filename_with_path);
408 			filename = g_dir_read_name (content_dir);
409 		}
410 		g_dir_close (content_dir);
411 	}
412 	/* Note from g_remove() documentation: on Windows, it is in general not
413 	 * possible to remove a file that is open to some process, or mapped
414 	 * into memory.*/
415 	return (g_remove (path_name));
416 }
417 
418 
419 static gboolean
420 check_mime_type             (const gchar* uri,
421                              GError** error);
422 
423 static gboolean
424 open_xml_document           (const gchar* filename);
425 
426 static gboolean
427 set_xml_root_node           (xmlChar* rootname);
428 
429 static xmlNodePtr
430 xml_get_pointer_to_node     (xmlChar* parserfor,
431                              xmlChar* attributename,
432                              xmlChar* attributevalue);
433 static void
434 xml_parse_children_of_node  (xmlNodePtr parent,
435                              xmlChar* parserfor,
436                              xmlChar* attributename,
437                              xmlChar* attributevalue);
438 
439 static gboolean
440 xml_check_attribute_value   (xmlNode* node,
441                              xmlChar * attributename,
442                              xmlChar* attributevalue);
443 
444 static xmlChar*
445 xml_get_data_from_node      (xmlNodePtr node,
446                              xmlParseReturnType rettype,
447                              xmlChar* attributename);
448 
449 static void
450 xml_free_doc();
451 
452 static void
453 free_tree_nodes             (gpointer data);
454 
455 /*Global variables for XML parsing*/
456 static xmlDocPtr    xmldocument ;
457 static xmlNodePtr   xmlroot ;
458 static xmlNodePtr   xmlretval ;
459 
460 /*
461 **Functions to parse the xml files.
462 **Open a XML document for reading
463 */
464 static gboolean
open_xml_document(const gchar * filename)465 open_xml_document ( const gchar* filename )
466 {
467 	xmldocument = xmlParseFile(filename);
468 
469 	if ( xmldocument == NULL )
470 	{
471 		return FALSE ;
472 	}
473 	else
474 	{
475 		return TRUE ;
476 	}
477 }
478 
479 /**
480  *Check if the root value is same as rootname .
481  *if supplied rootvalue = NULL ,just set root to rootnode .
482 **/
483 static gboolean
set_xml_root_node(xmlChar * rootname)484 set_xml_root_node(xmlChar* rootname)
485 {
486 	xmlroot = xmlDocGetRootElement(xmldocument);
487 
488 	if (xmlroot == NULL) {
489 
490 		xmlFreeDoc(xmldocument);
491 		return FALSE;
492 	}
493 
494     if ( rootname == NULL )
495     {
496         return TRUE ;
497     }
498 
499     if ( !xmlStrcmp(xmlroot->name,rootname))
500     {
501         return TRUE ;
502     }
503     else
504     {
505 	   return FALSE;
506     }
507 }
508 
509 static xmlNodePtr
xml_get_pointer_to_node(xmlChar * parserfor,xmlChar * attributename,xmlChar * attributevalue)510 xml_get_pointer_to_node(xmlChar* parserfor,
511                         xmlChar*  attributename,
512                         xmlChar* attributevalue )
513 {
514     xmlNodePtr topchild;
515 
516     xmlretval = NULL ;
517 
518     if ( !xmlStrcmp( xmlroot->name, parserfor) )
519     {
520         return xmlroot ;
521     }
522 
523     topchild = xmlroot->xmlChildrenNode ;
524 
525     while ( topchild != NULL )
526     {
527         if ( !xmlStrcmp(topchild->name,parserfor) )
528         {
529             if ( xml_check_attribute_value(topchild,attributename,attributevalue) == TRUE )
530             {
531                  xmlretval = topchild;
532                  return xmlretval;
533             }
534             else
535             {
536                 /*No need to parse children node*/
537                 topchild = topchild->next ;
538                 continue ;
539             }
540         }
541 
542         xml_parse_children_of_node(topchild , parserfor, attributename, attributevalue) ;
543 
544         topchild = topchild->next ;
545     }
546 
547     return xmlretval ;
548 }
549 
550 static void
xml_parse_children_of_node(xmlNodePtr parent,xmlChar * parserfor,xmlChar * attributename,xmlChar * attributevalue)551 xml_parse_children_of_node(xmlNodePtr parent,
552                            xmlChar* parserfor,
553                            xmlChar* attributename,
554                            xmlChar* attributevalue )
555 {
556     xmlNodePtr child = parent->xmlChildrenNode ;
557 
558     while ( child != NULL )
559     {
560         if ( !xmlStrcmp(child->name,parserfor))
561         {
562             if ( xml_check_attribute_value(child,attributename,attributevalue) == TRUE )
563             {
564                  xmlretval = child;
565                  return ;
566             }
567             else
568             {
569                 /*No need to parse children node*/
570                 child = child->next ;
571                 continue ;
572             }
573         }
574 
575         /*return already if we have xmlretval set*/
576         if ( xmlretval != NULL )
577         {
578             return ;
579         }
580 
581         xml_parse_children_of_node(child,parserfor,attributename,attributevalue) ;
582         child = child->next ;
583     }
584 }
585 
586 static void
xml_free_doc()587 xml_free_doc()
588 {
589     xmlFreeDoc(xmldocument);
590 	xmldocument = NULL;
591 }
592 
593 static gboolean
xml_check_attribute_value(xmlNode * node,xmlChar * attributename,xmlChar * attributevalue)594 xml_check_attribute_value(xmlNode* node,
595                           xmlChar * attributename,
596                           xmlChar* attributevalue)
597 {
598     xmlChar* attributefromfile ;
599     if ( attributename == NULL || attributevalue == NULL )
600     {
601          return TRUE ;
602     }
603     else if ( !xmlStrcmp(( attributefromfile = xmlGetProp(node,attributename)),
604                            attributevalue) )
605     {
606         xmlFree(attributefromfile);
607         return TRUE ;
608     }
609     xmlFree(attributefromfile);
610     return FALSE ;
611 }
612 
613 static xmlChar*
xml_get_data_from_node(xmlNodePtr node,xmlParseReturnType rettype,xmlChar * attributename)614 xml_get_data_from_node(xmlNodePtr node,
615                        xmlParseReturnType rettype,
616                        xmlChar* attributename)
617 {
618     xmlChar* datastring ;
619     if ( rettype == XML_ATTRIBUTE )
620        datastring= xmlGetProp(node,attributename);
621     else
622        datastring= xmlNodeListGetString(xmldocument,node->xmlChildrenNode, 1);
623 
624     return datastring;
625 }
626 
627 static gboolean
check_mime_type(const gchar * uri,GError ** error)628 check_mime_type(const gchar* uri,GError** error)
629 {
630     GError * err = NULL ;
631     const gchar* mimeFromFile = ev_file_get_mime_type(uri,FALSE,&err);
632 
633     gchar* mimetypes[] = {"application/epub+zip","application/x-booki+zip"};
634     int typecount = 2;
635     if ( !mimeFromFile )
636     {
637         if (err)    {
638             g_propagate_error (error, err);
639         }
640         else    {
641             g_set_error_literal (error,
642                          EV_DOCUMENT_ERROR,
643                          EV_DOCUMENT_ERROR_INVALID,
644                          _("Unknown MIME Type"));
645         }
646         return FALSE;
647     }
648     else
649     {
650         int i=0;
651         for (i=0; i < typecount ;i++) {
652            if ( g_strcmp0(mimeFromFile, mimetypes[i]) == 0  ) {
653                 return TRUE;
654            }
655         }
656 
657         /*We didn't find a match*/
658         g_set_error_literal (error,
659                      EV_DOCUMENT_ERROR,
660                      EV_DOCUMENT_ERROR_INVALID,
661                      _("Not an ePub document"));
662 
663         return FALSE;
664     }
665 }
666 
667 static gboolean
extract_one_file(EpubDocument * epub_document,GError ** error)668 extract_one_file(EpubDocument* epub_document,GError ** error)
669 {
670     GFile * outfile ;
671     gsize writesize = 0;
672     GString * gfilepath ;
673     unz_file_info64 info ;
674     gchar* directory;
675 	GString* dir_create;
676     GFileOutputStream * outstream ;
677 
678     if ( unzOpenCurrentFile(epub_document->epubDocument) != UNZ_OK )
679     {
680             return FALSE ;
681     }
682 
683     gboolean result = TRUE;
684 
685     gpointer currentfilename = g_malloc0(512);
686     unzGetCurrentFileInfo64(epub_document->epubDocument,&info,currentfilename,512,NULL,0,NULL,0) ;
687     directory = g_strrstr(currentfilename,"/") ;
688 
689     if ( directory != NULL )
690         directory++;
691 
692     gfilepath = g_string_new(epub_document->tmp_archive_dir) ;
693     g_string_append_printf(gfilepath,"/%s",(gchar*)currentfilename);
694 
695     // handle the html extension (IssueID #266)
696     if (g_strrstr(currentfilename, ".html") != NULL)
697         g_string_insert_c (gfilepath, gfilepath->len-4, 'x');
698 
699     /*if we encounter a directory, make a directory inside our temporary folder.*/
700     if (directory != NULL && *directory == '\0')
701     {
702         g_mkdir(gfilepath->str,0777);
703         goto out;
704     }
705     else if (directory != NULL && *directory != '\0' ) {
706         gchar* createdir = currentfilename;
707         /*Since a substring can't be longer than the parent string, allocating space equal to the parent's size should suffice*/
708         gchar *createdirname = g_malloc0(strlen(currentfilename));
709         /* Add the name of the directory and subdirectories,if any to a buffer and then create it */
710         gchar *createdirnametemp = createdirname;
711         while ( createdir != directory ) {
712             (*createdirnametemp) = (*createdir);
713             createdirnametemp++;
714             createdir++;
715         }
716         (*createdirnametemp) = '\0';
717 
718         dir_create = g_string_new(epub_document->tmp_archive_dir);
719         g_string_append_printf(dir_create,"/%s",createdirname);
720         g_free(createdirname);
721 
722         g_mkdir_with_parents(dir_create->str,0777);
723 		g_string_free(dir_create,TRUE);
724     }
725 
726     outfile = g_file_new_for_path(gfilepath->str);
727     outstream = g_file_create(outfile,G_FILE_CREATE_PRIVATE,NULL,error);
728     gpointer buffer = g_malloc0(512);
729     while ( (writesize = unzReadCurrentFile(epub_document->epubDocument,buffer,512) ) != 0 )
730     {
731         if ( g_output_stream_write((GOutputStream*)outstream,buffer,writesize,NULL,error) == -1 )
732         {
733             result = FALSE;
734             break;
735         }
736     }
737     g_free(buffer);
738     g_output_stream_close((GOutputStream*)outstream,NULL,error);
739     g_object_unref(outfile) ;
740     g_object_unref(outstream) ;
741 
742 out:
743     unzCloseCurrentFile (epub_document->epubDocument) ;
744     g_string_free(gfilepath,TRUE);
745     g_free(currentfilename);
746 	return result;
747 }
748 
749 static gboolean
extract_epub_from_container(const gchar * uri,EpubDocument * epub_document,GError ** error)750 extract_epub_from_container (const gchar* uri,
751                              EpubDocument *epub_document,
752                              GError ** error)
753 {
754     GError *err = NULL;
755     epub_document->archivename = g_filename_from_uri(uri,NULL,error);
756 
757     if ( !epub_document->archivename )
758     {
759         if (err) {
760             g_propagate_error (error, err);
761         }
762         else {
763             g_set_error_literal (error,
764                          EV_DOCUMENT_ERROR,
765                          EV_DOCUMENT_ERROR_INVALID,
766                          _("could not retrieve filename"));
767         }
768         return FALSE;
769     }
770 
771     gchar *epubfilename = g_strrstr(epub_document->archivename,"/");
772     if ( *epubfilename == '/' )
773         epubfilename++ ;
774 
775     GString *temporary_sub_directory = g_string_new(epubfilename);
776     g_string_append(temporary_sub_directory,"XXXXXX") ;
777     epub_document->tmp_archive_dir = ev_mkdtemp(temporary_sub_directory->str, error);
778     g_string_free(temporary_sub_directory, TRUE);
779 
780     if (!epub_document->tmp_archive_dir) {
781         return FALSE;
782     }
783 
784     epub_document->epubDocument = unzOpen64(epub_document->archivename);
785     if ( epub_document->epubDocument == NULL )
786     {
787         if (err)    {
788             g_propagate_error (error, err);
789         }
790         else    {
791             g_set_error_literal (error,
792                          EV_DOCUMENT_ERROR,
793                          EV_DOCUMENT_ERROR_INVALID,
794                          _("could not open archive"));
795         }
796         return FALSE;
797     }
798 
799     gboolean result = FALSE;
800 
801     if ( unzGoToFirstFile(epub_document->epubDocument) != UNZ_OK )
802     {
803         if (err) {
804             g_propagate_error (error, err);
805         }
806         else    {
807             g_set_error_literal (error,
808                          EV_DOCUMENT_ERROR,
809                          EV_DOCUMENT_ERROR_INVALID,
810                          _("could not extract archive"));
811         }
812         goto out;
813     }
814 
815     while ( TRUE )
816     {
817         if ( extract_one_file(epub_document,&err) == FALSE )
818         {
819             if (err) {
820                 g_propagate_error (error, err);
821             }
822             else    {
823                 g_set_error_literal (error,
824                              EV_DOCUMENT_ERROR,
825                              EV_DOCUMENT_ERROR_INVALID,
826                              _("could not extract archive"));
827             }
828 			goto out;
829         }
830 
831         if ( unzGoToNextFile(epub_document->epubDocument) == UNZ_END_OF_LIST_OF_FILE ) {
832             result = TRUE;
833             break;
834         }
835     }
836 
837 out:
838     unzClose(epub_document->epubDocument);
839     return result;
840 }
841 
842 static gchar*
get_uri_to_content(const gchar * uri,GError ** error,EpubDocument * epub_document)843 get_uri_to_content(const gchar* uri,GError ** error,EpubDocument *epub_document)
844 {
845 	gchar* tmp_archive_dir = epub_document->tmp_archive_dir;
846     GError *err = NULL ;
847 
848     gchar *containerpath = g_filename_from_uri(uri,NULL,&err);
849     if ( !containerpath )
850     {
851         if (err) {
852             g_propagate_error (error,err);
853         }
854         else    {
855             g_set_error_literal (error,
856                                  EV_DOCUMENT_ERROR,
857                                  EV_DOCUMENT_ERROR_INVALID,
858                                  _("could not retrieve container file"));
859         }
860         return NULL ;
861     }
862 
863     gboolean result = open_xml_document(containerpath);
864     g_free (containerpath);
865     if ( result == FALSE )
866     {
867         g_set_error_literal(error,
868                             EV_DOCUMENT_ERROR,
869                             EV_DOCUMENT_ERROR_INVALID,
870                             _("could not open container file"));
871 
872         return NULL ;
873     }
874 
875     if ( set_xml_root_node((xmlChar*)"container") == FALSE)  {
876 
877         g_set_error_literal(error,
878                             EV_DOCUMENT_ERROR,
879                             EV_DOCUMENT_ERROR_INVALID,
880                             _("container file is corrupt"));
881         return NULL ;
882     }
883 
884     xmlNodePtr rootfileNode = xml_get_pointer_to_node((xmlChar*)"rootfile",(xmlChar*)"media-type",(xmlChar*)"application/oebps-package+xml");
885     if ( rootfileNode == NULL)
886     {
887         g_set_error_literal(error,
888                             EV_DOCUMENT_ERROR,
889                             EV_DOCUMENT_ERROR_INVALID,
890                             _("epub file is invalid or corrupt"));
891         return NULL ;
892     }
893 
894     xmlChar *relativepath = xml_get_data_from_node(rootfileNode,XML_ATTRIBUTE,(xmlChar*)"full-path") ;
895     if ( relativepath == NULL )
896     {
897         g_set_error_literal(error,
898                             EV_DOCUMENT_ERROR,
899                             EV_DOCUMENT_ERROR_INVALID,
900                             _("epub file is corrupt, no container"));
901         return NULL ;
902     }
903 
904 	gchar* documentfolder = g_strrstr((gchar*)relativepath,"/");
905 	if (documentfolder != NULL) {
906 		gchar* copybuffer = (gchar*)relativepath ;
907 		gchar* directorybuffer = g_malloc0(sizeof(gchar*)*100);
908 		gchar* writer = directorybuffer;
909 
910 		while(copybuffer != documentfolder) {
911 			(*writer) = (*copybuffer);
912 			writer++;copybuffer++;
913 		}
914 		*writer = '\0';
915 
916 		GString *documentdir = g_string_new(tmp_archive_dir);
917 		g_string_append_printf(documentdir,"/%s",directorybuffer);
918 		g_free(directorybuffer);
919 		epub_document->documentdir = g_string_free(documentdir,FALSE);
920 	}
921 	else
922 	{
923 		epub_document->documentdir = g_strdup(tmp_archive_dir);
924 	}
925 
926 	GString *absolutepath = g_string_new(tmp_archive_dir);
927     g_string_append_printf(absolutepath,"/%s",relativepath);
928     g_free (relativepath);
929 
930     gchar *content_uri = g_filename_to_uri(absolutepath->str,NULL,&err);
931     g_string_free(absolutepath,TRUE);
932     if ( !content_uri )  {
933         if (err) {
934             g_propagate_error (error,err);
935         }
936         else
937         {
938             g_set_error_literal (error,
939                                  EV_DOCUMENT_ERROR,
940                                  EV_DOCUMENT_ERROR_INVALID,
941                                  _("could not retrieve container file"));
942         }
943         return NULL ;
944     }
945 	xml_free_doc();
946     return content_uri ;
947 }
948 
949 static gboolean
link_present_on_page(const gchar * link,const gchar * page_uri)950 link_present_on_page(const gchar* link,const gchar *page_uri)
951 {
952 	gchar *res;
953 	if ((res=g_strrstr(link, page_uri)) != NULL) {
954 		return TRUE;
955 	}
956 	else {
957 		return FALSE;
958 	}
959 }
960 
961 static void
check_add_page_numbers(linknode * listdata,contentListNode * comparenode)962 check_add_page_numbers(linknode *listdata, contentListNode *comparenode)
963 {
964     if (link_present_on_page(listdata->pagelink, comparenode->value)) {
965 		listdata->page = comparenode->index - 1;
966 	}
967     if (listdata->children != NULL) {
968         g_list_foreach(listdata->children,(GFunc)check_add_page_numbers,comparenode);
969     }
970 }
971 
972 static GList*
setup_document_content_list(const gchar * content_uri,GError ** error,gchar * documentdir)973 setup_document_content_list(const gchar* content_uri, GError** error,gchar *documentdir)
974 {
975     GError *err = NULL;
976     gint indexcounter = 1;
977     xmlNodePtr manifest,spine,itemrefptr,itemptr;
978     gboolean errorflag = FALSE;
979 
980     if ( open_xml_document(content_uri) == FALSE )
981     {
982         g_set_error_literal(error,
983                             EV_DOCUMENT_ERROR,
984                             EV_DOCUMENT_ERROR_INVALID,
985                             _("could not parse content manifest"));
986 
987         return FALSE ;
988     }
989     if ( set_xml_root_node((xmlChar*)"package") == FALSE)  {
990 
991         g_set_error_literal(error,
992                             EV_DOCUMENT_ERROR,
993                             EV_DOCUMENT_ERROR_INVALID,
994                             _("content file is invalid"));
995         return FALSE ;
996     }
997 
998     if ( ( spine = xml_get_pointer_to_node((xmlChar*)"spine",NULL,NULL) )== NULL )
999     {
1000          g_set_error_literal(error,
1001                             EV_DOCUMENT_ERROR,
1002                             EV_DOCUMENT_ERROR_INVALID,
1003                             _("epub file has no spine"));
1004         return FALSE ;
1005     }
1006 
1007     if ( ( manifest = xml_get_pointer_to_node((xmlChar*)"manifest",NULL,NULL) )== NULL )
1008     {
1009          g_set_error_literal(error,
1010                             EV_DOCUMENT_ERROR,
1011                             EV_DOCUMENT_ERROR_INVALID,
1012                             _("epub file has no manifest"));
1013         return FALSE ;
1014     }
1015 
1016     xmlretval = NULL ;
1017 
1018     /*Get first instance of itemref from the spine*/
1019     xml_parse_children_of_node(spine,(xmlChar*)"itemref",NULL,NULL);
1020 
1021     if ( xmlretval != NULL )
1022         itemrefptr = xmlretval ;
1023     else
1024     {
1025         errorflag=TRUE;
1026     }
1027 
1028     GList *newlist = NULL;
1029 
1030     /*Parse the spine for remaining itemrefs*/
1031     do
1032     {
1033         /*for the first time that we enter the loop, if errorflag is set we break*/
1034         if ( errorflag )
1035         {
1036             break;
1037         }
1038         if ( xmlStrcmp(itemrefptr->name,(xmlChar*)"itemref") == 0)
1039         {
1040             contentListNode *newnode = g_malloc0(sizeof(newnode));
1041             newnode->key = (gchar*)xml_get_data_from_node(itemrefptr,XML_ATTRIBUTE,(xmlChar*)"idref");
1042             if ( newnode->key == NULL )
1043             {
1044                 g_free (newnode);
1045                 errorflag = TRUE;
1046                 break;
1047             }
1048             xmlretval=NULL ;
1049             xml_parse_children_of_node(manifest,(xmlChar*)"item",(xmlChar*)"id",(xmlChar*)newnode->key);
1050 
1051             if ( xmlretval != NULL )
1052             {
1053                 itemptr = xmlretval ;
1054             }
1055             else
1056             {
1057                 g_free (newnode->key);
1058                 g_free (newnode);
1059                 errorflag = TRUE;
1060                 break;
1061             }
1062 
1063 
1064             GString* absolutepath = g_string_new(documentdir);
1065             gchar *relativepath = (gchar*)xml_get_data_from_node(itemptr,XML_ATTRIBUTE,(xmlChar*)"href");
1066             g_string_append_printf(absolutepath,"/%s",relativepath);
1067 
1068             // Handle the html extension (IssueID #266)
1069            if (g_strrstr(relativepath, ".html") != NULL)
1070                 g_string_insert_c (absolutepath, absolutepath->len-4, 'x');
1071             g_free (relativepath);
1072 
1073             newnode->value = g_filename_to_uri(absolutepath->str,NULL,&err);
1074             g_string_free(absolutepath, TRUE);
1075 
1076             if ( newnode->value == NULL )
1077             {
1078                 g_free (newnode->key);
1079                 g_free (newnode);
1080                 errorflag = TRUE;
1081                 break;
1082             }
1083 
1084 			newnode->index = indexcounter++ ;
1085 
1086             newlist = g_list_prepend(newlist, newnode);
1087         }
1088         itemrefptr = itemrefptr->next ;
1089     }
1090     while ( itemrefptr != NULL );
1091 
1092     if ( errorflag )
1093     {
1094         if ( err )
1095         {
1096             g_propagate_error(error,err);
1097         }
1098         else
1099         {
1100             g_set_error_literal(error,
1101                                 EV_DOCUMENT_ERROR,
1102                                 EV_DOCUMENT_ERROR_INVALID,
1103                                 _("Could not set up document tree for loading, some files missing"));
1104         }
1105         /*free any nodes that were set up and return empty*/
1106         g_list_free_full(newlist, (GDestroyNotify)free_tree_nodes);
1107         return NULL;
1108     }
1109 
1110 	newlist = g_list_reverse(newlist);
1111 	xml_free_doc();
1112     return newlist;
1113 }
1114 
1115 /* Callback function to free the contentlist.*/
1116 static void
free_tree_nodes(gpointer data)1117 free_tree_nodes(gpointer data)
1118 {
1119     contentListNode* dataptr = data ;
1120     g_free(dataptr->value);
1121     g_free(dataptr->key);
1122     g_free(dataptr);
1123 }
1124 
1125 static void
free_link_nodes(gpointer data)1126 free_link_nodes(gpointer data)
1127 {
1128     linknode* dataptr = data ;
1129     g_free(dataptr->pagelink);
1130     g_free(dataptr->linktext);
1131 
1132 	if (dataptr->children) {
1133 		g_list_free_full(dataptr->children,(GDestroyNotify)free_link_nodes);
1134 	}
1135     g_free(dataptr);
1136 }
1137 
1138 static gchar*
get_toc_file_name(gchar * containeruri)1139 get_toc_file_name(gchar *containeruri)
1140 {
1141 	gchar *containerfilename = g_filename_from_uri(containeruri,NULL,NULL);
1142 	open_xml_document(containerfilename);
1143 	g_free (containerfilename);
1144 
1145 	set_xml_root_node(NULL);
1146 
1147 	xmlNodePtr manifest = xml_get_pointer_to_node((xmlChar*)"manifest",NULL,NULL);
1148 	xmlNodePtr spine = xml_get_pointer_to_node((xmlChar*)"spine",NULL,NULL);
1149 
1150 	xmlChar *ncx = xml_get_data_from_node(spine,XML_ATTRIBUTE,(xmlChar*)"toc");
1151 
1152     /*In an epub3, there is sometimes no toc, and we need to then use the nav file for this.*/
1153     if (ncx == NULL) {
1154         return NULL;
1155     }
1156 
1157 	xmlretval = NULL;
1158 	xml_parse_children_of_node(manifest,(xmlChar*)"item",(xmlChar*)"id",ncx);
1159 
1160 	gchar* tocfilename = (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"href");
1161 	xml_free_doc();
1162 
1163 	return tocfilename;
1164 }
1165 
1166 static gchar*
epub_document_get_nav_file(gchar * containeruri)1167 epub_document_get_nav_file(gchar* containeruri)
1168 {
1169     open_xml_document(containeruri);
1170     set_xml_root_node(NULL);
1171     xmlNodePtr manifest = xml_get_pointer_to_node((xmlChar*)"manifest",NULL,NULL);
1172     xmlretval = NULL;
1173     xml_parse_children_of_node(manifest,(xmlChar*)"item",(xmlChar*)"properties",(xmlChar*)"nav");
1174 
1175     gchar *uri = (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE, (xmlChar*)"href");
1176 
1177     xml_free_doc();
1178     return uri;
1179 }
1180 
1181 static GList*
get_child_list(xmlNodePtr ol,gchar * documentdir)1182 get_child_list(xmlNodePtr ol,gchar* documentdir)
1183 {
1184     GList *childlist = NULL;
1185     xmlNodePtr li = ol->xmlChildrenNode;
1186 
1187     while (li != NULL) {
1188 		if (xmlStrcmp(li->name,(xmlChar*)"li")) {
1189 			li = li->next;
1190 			continue;
1191 		}
1192         xmlNodePtr children = li->xmlChildrenNode;
1193         linknode *newlinknode = g_new0(linknode, 1);
1194         while (children != NULL) {
1195             if ( !xmlStrcmp(children->name,(xmlChar*)"a")) {
1196                 newlinknode->linktext = (gchar*)xml_get_data_from_node(children,XML_KEYWORD,NULL);
1197                 gchar* filename = (gchar*)xml_get_data_from_node(children,XML_ATTRIBUTE,(xmlChar*)"href");
1198 				gchar *filepath = g_strdup_printf("%s/%s",documentdir,filename);
1199 				newlinknode->pagelink = g_filename_to_uri(filepath,NULL,NULL);
1200 				g_free(filename);
1201 				g_free(filepath);
1202                 newlinknode->children = NULL;
1203                 childlist = g_list_prepend(childlist,newlinknode);
1204             }
1205             else if ( !xmlStrcmp(children->name,(xmlChar*)"ol")){
1206                 newlinknode->children = get_child_list(children,documentdir);
1207             }
1208 
1209 			children = children->next;
1210         }
1211 
1212         li = li->next;
1213     }
1214 
1215     return g_list_reverse(childlist);
1216 }
1217 
1218 /* For an epub3 style navfile */
1219 static GList*
setup_index_from_navfile(gchar * tocpath)1220 setup_index_from_navfile(gchar *tocpath)
1221 {
1222     GList *index = NULL;
1223     open_xml_document(tocpath);
1224     set_xml_root_node(NULL);
1225     xmlNodePtr nav = xml_get_pointer_to_node((xmlChar*)"nav",(xmlChar*)"id",(xmlChar*)"toc");
1226     xmlretval=NULL;
1227     xml_parse_children_of_node(nav,(xmlChar*)"ol", NULL,NULL);
1228 	gchar *navdirend = g_strrstr(tocpath,"/");
1229 	gchar *navdir = g_malloc0(strlen(tocpath));
1230 	gchar *reader = tocpath;
1231 	gchar *writer = navdir;
1232 
1233 	while (reader != navdirend) {
1234 		(*writer) = (*reader) ;
1235 		writer++;reader++;
1236 	}
1237     index = get_child_list(xmlretval,navdir);
1238 	g_free(navdir);
1239     xml_free_doc();
1240     return index;
1241 }
1242 
1243 static GList*
setup_document_children(EpubDocument * epub_document,xmlNodePtr node)1244 setup_document_children(EpubDocument *epub_document,xmlNodePtr node)
1245 {
1246     GList *index = NULL;
1247 
1248     xmlretval = NULL;
1249     xml_parse_children_of_node(node,(xmlChar*)"navPoint",NULL,NULL);
1250     xmlNodePtr navPoint = xmlretval;
1251 
1252     while(navPoint != NULL) {
1253 
1254         if ( !xmlStrcmp(navPoint->name,(xmlChar*)"navPoint")) {
1255     		xmlretval = NULL;
1256     		xml_parse_children_of_node(navPoint,(xmlChar*)"navLabel",NULL,NULL);
1257     		xmlNodePtr navLabel = xmlretval;
1258     		xmlretval = NULL;
1259     		gchar *fragment=NULL,*end=NULL;
1260     		GString *uri = NULL;
1261             GString *pagelink = NULL;
1262 
1263     		xml_parse_children_of_node(navLabel,(xmlChar*)"text",NULL,NULL);
1264 
1265             linknode *newnode = g_new0(linknode,1);
1266             newnode->linktext = NULL;
1267             while (newnode->linktext == NULL) {
1268 	            newnode->linktext = (gchar*)xml_get_data_from_node(xmlretval,XML_KEYWORD,NULL);
1269 	            xmlretval = xmlretval->next;
1270             }
1271 
1272             xmlretval = NULL;
1273             xml_parse_children_of_node(navPoint,(xmlChar*)"content",NULL,NULL);
1274             pagelink = g_string_new(epub_document->documentdir);
1275             newnode->pagelink = (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"src");
1276             g_string_append_printf(pagelink,"/%s",newnode->pagelink);
1277 
1278             xmlFree(newnode->pagelink);
1279 
1280             gchar *escaped = g_strdup(pagelink->str);
1281 
1282             //unescaping any special characters
1283             pagelink->str = g_uri_unescape_string (escaped,NULL);
1284             g_free(escaped);
1285 
1286             // cut off fragment after '#', only in the last segment of path
1287             if ((end = g_strrstr(pagelink->str,"#")) != NULL && (end > g_strrstr(pagelink->str,"/"))) {
1288 	            fragment = g_strdup(g_strrstr(pagelink->str,"#"));
1289 	            *end = '\0';
1290             }
1291 
1292             uri = g_string_new(g_filename_to_uri(pagelink->str,NULL,NULL));
1293 
1294             // handle the html extension (IssueID #266)
1295             if (g_strrstr(uri->str, ".html") != NULL)
1296                 g_string_insert_c (uri, uri->len-4, 'x');
1297 
1298             g_string_free(pagelink,TRUE);
1299 
1300             if (fragment) {
1301 	            g_string_append(uri,fragment);
1302             }
1303 
1304             newnode->pagelink = g_strdup(uri->str);
1305             newnode->children = setup_document_children(epub_document, navPoint);
1306             g_string_free(uri,TRUE);
1307             index = g_list_prepend(index,newnode);
1308         }
1309 
1310         navPoint = navPoint->next;
1311     }
1312 
1313     return g_list_reverse (index);
1314 }
1315 
1316 static GList*
setup_document_index(EpubDocument * epub_document,gchar * containeruri)1317 setup_document_index(EpubDocument *epub_document,gchar *containeruri)
1318 {
1319     GString *tocpath = g_string_new(epub_document->documentdir);
1320     gchar *tocfilename = get_toc_file_name(containeruri);
1321     GList *index = NULL;
1322 
1323     if (tocfilename == NULL) {
1324         tocfilename = epub_document_get_nav_file(containeruri);
1325 
1326         //Apparently, sometimes authors don't even care to add a TOC!! Guess standards are just guidelines.
1327 
1328         if (tocfilename == NULL) {
1329             //We didn't even find a nav file.The document has no TOC.
1330             g_string_free(tocpath,TRUE);
1331             return NULL;
1332         }
1333 
1334         g_string_append_printf (tocpath,"/%s",tocfilename);
1335         index = setup_index_from_navfile(tocpath->str);
1336         g_string_free(tocpath,TRUE);
1337         g_free (tocfilename);
1338         return index;
1339     }
1340 
1341     g_string_append_printf (tocpath,"/%s",tocfilename);
1342     g_free (tocfilename);
1343 
1344     open_xml_document(tocpath->str);
1345     g_string_free(tocpath,TRUE);
1346     set_xml_root_node((xmlChar*)"ncx");
1347 
1348 	xmlNodePtr docTitle = xml_get_pointer_to_node((xmlChar*)"docTitle",NULL,NULL);
1349 	xmlretval = NULL;
1350 	xml_parse_children_of_node(docTitle,(xmlChar*)"text",NULL,NULL);
1351 
1352 	while (epub_document->docTitle == NULL && xmlretval != NULL) {
1353 		epub_document->docTitle = (gchar*)xml_get_data_from_node(xmlretval,XML_KEYWORD,NULL);
1354 		xmlretval = xmlretval->next;
1355 	}
1356     xmlNodePtr navMap = xml_get_pointer_to_node((xmlChar*)"navMap",NULL,NULL);
1357     index = setup_document_children (epub_document, navMap);
1358 
1359 	xml_free_doc();
1360     return index;
1361 }
1362 
1363 static EvDocumentInfo*
epub_document_get_info(EvDocument * document)1364 epub_document_get_info(EvDocument *document)
1365 {
1366 	EpubDocument *epub_document = EPUB_DOCUMENT(document);
1367 	GError *error = NULL ;
1368 	gchar* infofile ;
1369 	xmlNodePtr metanode ;
1370 	GString* buffer ;
1371 
1372 	GString* containerpath = g_string_new(epub_document->tmp_archive_dir);
1373 	g_string_append_printf(containerpath,"/META-INF/container.xml");
1374 	gchar* containeruri = g_filename_to_uri(containerpath->str,NULL,&error);
1375 	g_string_free (containerpath, TRUE);
1376 	if ( error )
1377 	{
1378 		return NULL ;
1379 	}
1380 
1381 	gchar* uri = get_uri_to_content (containeruri,&error,epub_document);
1382 	g_free (containeruri);
1383 	if ( error )
1384 	{
1385 		return NULL ;
1386 	}
1387 
1388 	EvDocumentInfo* epubinfo = g_new0 (EvDocumentInfo, 1);
1389 
1390 	epubinfo->fields_mask = EV_DOCUMENT_INFO_TITLE |
1391 			    EV_DOCUMENT_INFO_FORMAT |
1392 			    EV_DOCUMENT_INFO_AUTHOR |
1393 			    EV_DOCUMENT_INFO_SUBJECT |
1394 			    EV_DOCUMENT_INFO_KEYWORDS |
1395 			    EV_DOCUMENT_INFO_LAYOUT |
1396 			    EV_DOCUMENT_INFO_CREATOR |
1397 			    EV_DOCUMENT_INFO_LINEARIZED |
1398 				EV_DOCUMENT_INFO_PERMISSIONS |
1399 			    EV_DOCUMENT_INFO_N_PAGES ;
1400 
1401 	infofile = g_filename_from_uri(uri,NULL,&error);
1402 	g_free (uri);
1403 	if ( error )
1404 	{
1405 		return epubinfo;
1406 	}
1407 
1408 	open_xml_document(infofile);
1409 	g_free (infofile);
1410 
1411 	set_xml_root_node((xmlChar*)"package");
1412 
1413 	metanode = xml_get_pointer_to_node((xmlChar*)"title",NULL,NULL);
1414 	if ( metanode == NULL )
1415 	  epubinfo->title = NULL ;
1416 	else
1417 	  epubinfo->title = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
1418 
1419 	metanode = xml_get_pointer_to_node((xmlChar*)"creator",NULL,NULL);
1420 	if ( metanode == NULL )
1421 	  epubinfo->author = g_strdup("unknown");
1422 	else
1423 	  epubinfo->author = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
1424 
1425 	metanode = xml_get_pointer_to_node((xmlChar*)"subject",NULL,NULL);
1426 	if ( metanode == NULL )
1427 	   epubinfo->subject = g_strdup("unknown");
1428 	else
1429 	   epubinfo->subject = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
1430 
1431 	buffer = g_string_new((gchar*)xml_get_data_from_node (xmlroot,XML_ATTRIBUTE,(xmlChar*)"version"));
1432 	g_string_prepend(buffer,"epub ");
1433 	epubinfo->format = g_string_free(buffer,FALSE);
1434 
1435 	/*FIXME: Add more of these as you write the corresponding modules*/
1436 
1437 	epubinfo->layout = EV_DOCUMENT_LAYOUT_SINGLE_PAGE;
1438 
1439 	metanode = xml_get_pointer_to_node((xmlChar*)"publisher",NULL,NULL);
1440 	if ( metanode == NULL )
1441 	   epubinfo->creator = g_strdup("unknown");
1442 	else
1443 	   epubinfo->creator = (char*)xml_get_data_from_node(metanode,XML_KEYWORD,NULL);
1444 
1445 	/* number of pages */
1446 	epubinfo->n_pages = epub_document_get_n_pages(document);
1447 
1448 	/*Copying*/
1449 	epubinfo->permissions = EV_DOCUMENT_PERMISSIONS_OK_TO_COPY;
1450 	/*TODO : Add a function to get date*/
1451 
1452 	if (xmldocument)
1453 		xml_free_doc();
1454 	return epubinfo ;
1455 }
1456 
1457 static EvPage*
epub_document_get_page(EvDocument * document,gint index)1458 epub_document_get_page(EvDocument *document,
1459                        gint index)
1460 {
1461 	EpubDocument *epub_document = EPUB_DOCUMENT(document);
1462 	EvPage* page = ev_page_new(index);
1463 	contentListNode *listptr = g_list_nth_data (epub_document->contentList,index);
1464 	page->backend_page = g_strdup(listptr->value);
1465 
1466 	return page ;
1467 }
1468 
1469 static void
change_to_night_sheet(contentListNode * nodedata,gpointer user_data)1470 change_to_night_sheet(contentListNode *nodedata,gpointer user_data)
1471 {
1472     gchar *filename = g_filename_from_uri(nodedata->value,NULL,NULL);
1473     open_xml_document(filename);
1474     set_xml_root_node(NULL);
1475     xmlNodePtr head =xml_get_pointer_to_node((xmlChar*)"head",NULL,NULL);
1476 	gchar *class = NULL;
1477     xmlretval = NULL;
1478     xml_parse_children_of_node(head,(xmlChar*)"link",(xmlChar*)"rel",(xmlChar*)"stylesheet");
1479 
1480     xmlNodePtr day = xmlretval;
1481 	if ( (class = (gchar*)xml_get_data_from_node(day,XML_ATTRIBUTE,(xmlChar*)"class")) == NULL) {
1482 		xmlSetProp(day,(xmlChar*)"class",(xmlChar*)"day");
1483 	}
1484 	g_free(class);
1485     xmlSetProp(day,(xmlChar*)"rel",(xmlChar*)"alternate stylesheet");
1486     xmlretval = NULL;
1487     xml_parse_children_of_node(head,(xmlChar*)"link",(xmlChar*)"class",(xmlChar*)"night");
1488     xmlSetProp(xmlretval,(xmlChar*)"rel",(xmlChar*)"stylesheet");
1489     xmlSaveFormatFile (filename, xmldocument, 0);
1490     xml_free_doc();
1491     g_free(filename);
1492 }
1493 
1494 static void
change_to_day_sheet(contentListNode * nodedata,gpointer user_data)1495 change_to_day_sheet(contentListNode *nodedata,gpointer user_data)
1496 {
1497     gchar *filename = g_filename_from_uri(nodedata->value,NULL,NULL);
1498     open_xml_document(filename);
1499     set_xml_root_node(NULL);
1500     xmlNodePtr head =xml_get_pointer_to_node((xmlChar*)"head",NULL,NULL);
1501 
1502     xmlretval = NULL;
1503     xml_parse_children_of_node(head,(xmlChar*)"link",(xmlChar*)"rel",(xmlChar*)"stylesheet");
1504 
1505     xmlNodePtr day = xmlretval;
1506     xmlSetProp(day,(xmlChar*)"rel",(xmlChar*)"alternate stylesheet");
1507 
1508     xmlretval = NULL;
1509     xml_parse_children_of_node(head,(xmlChar*)"link",(xmlChar*)"class",(xmlChar*)"day");
1510     xmlSetProp(xmlretval,(xmlChar*)"rel",(xmlChar*)"stylesheet");
1511     xmlSaveFormatFile (filename, xmldocument, 0);
1512     xml_free_doc();
1513     g_free(filename);
1514 }
1515 
1516 static gchar*
epub_document_get_alternate_stylesheet(gchar * docuri)1517 epub_document_get_alternate_stylesheet(gchar *docuri)
1518 {
1519     gchar *filename = g_filename_from_uri(docuri,NULL,NULL);
1520     open_xml_document(filename);
1521     g_free(filename);
1522 
1523     set_xml_root_node(NULL);
1524 
1525     xmlNodePtr head= xml_get_pointer_to_node((xmlChar*)"head",NULL,NULL);
1526 
1527     xmlretval = NULL;
1528 
1529     xml_parse_children_of_node(head,(xmlChar*)"link",(xmlChar*)"class",(xmlChar*)"night");
1530 
1531     if (xmlretval != NULL) {
1532         return (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"href");
1533     }
1534     xml_free_doc();
1535     return NULL;
1536 }
1537 
1538 static void
add_night_sheet(contentListNode * listdata,gchar * sheet)1539 add_night_sheet(contentListNode *listdata,gchar *sheet)
1540 {
1541     gchar *sheeturi = g_filename_to_uri(sheet,NULL,NULL);
1542     open_xml_document(listdata->value);
1543 
1544     set_xml_root_node(NULL);
1545 
1546     xmlSaveFormatFile (listdata->value, xmldocument, 0);
1547     xml_free_doc();
1548     g_free(sheeturi);
1549 }
1550 
1551 static void
epub_document_check_add_night_sheet(EvDocument * document)1552 epub_document_check_add_night_sheet(EvDocument *document)
1553 {
1554     EpubDocument *epub_document = EPUB_DOCUMENT(document);
1555 
1556     g_return_if_fail(EPUB_IS_DOCUMENT(epub_document));
1557 
1558     /*
1559      * We'll only check the first page for a supplied night mode stylesheet.
1560      * Odds are, if this one has it, all others have it too.
1561      */
1562 	contentListNode *node = epub_document->contentList->data;
1563     gchar* stylesheetfilename = epub_document_get_alternate_stylesheet((gchar*)node->value) ;
1564 
1565     if (stylesheetfilename == NULL) {
1566         gchar *style = "body {color:rgb(255,255,255);\
1567                         background-color:rgb(0,0,0);\
1568                         text-align:justify;\
1569                         line-spacing:1.8;\
1570                         margin-top:0px;\
1571                         margin-bottom:4px;\
1572                         margin-right:50px;\
1573                         margin-left:50px;\
1574                         text-indent:3em;}\
1575                         h1, h2, h3, h4, h5, h6\
1576                         {color:white;\
1577                         text-align:center;\
1578                         font-style:italic;\
1579                         font-weight:bold;}";
1580 
1581         gchar *csspath = g_strdup_printf("%s/xreadernightstyle.css",epub_document->documentdir);
1582 
1583 
1584         GFile *styles = g_file_new_for_path (csspath);
1585         GOutputStream *outstream = (GOutputStream*)g_file_create(styles,G_FILE_CREATE_PRIVATE,NULL,NULL);
1586         if ( g_output_stream_write((GOutputStream*)outstream,style,strlen(style),NULL,NULL) == -1 )
1587         {
1588             return ;
1589         }
1590         g_output_stream_close((GOutputStream*)outstream,NULL,NULL);
1591         g_object_unref(styles) ;
1592         g_object_unref(outstream) ;
1593         //add this stylesheet to each document, for later.
1594         g_list_foreach(epub_document->contentList,(GFunc)add_night_sheet,csspath);
1595         g_free(csspath);
1596     }
1597     g_free(stylesheetfilename);
1598 }
1599 
1600 static void
epub_document_toggle_night_mode(EvDocument * document,gboolean night)1601 epub_document_toggle_night_mode(EvDocument *document,gboolean night)
1602 {
1603     EpubDocument *epub_document = EPUB_DOCUMENT(document);
1604 
1605     g_return_if_fail(EPUB_IS_DOCUMENT(epub_document));
1606     if (night)
1607         g_list_foreach(epub_document->contentList,(GFunc)change_to_night_sheet,NULL);
1608     else
1609         g_list_foreach(epub_document->contentList,(GFunc)change_to_day_sheet,NULL);
1610 }
1611 
1612 static gchar*
epub_document_set_document_title(gchar * containeruri)1613 epub_document_set_document_title(gchar *containeruri)
1614 {
1615 	open_xml_document(containeruri);
1616 	gchar *doctitle;
1617 	set_xml_root_node(NULL);
1618 
1619 	xmlNodePtr title = xml_get_pointer_to_node((xmlChar*)"title",NULL,NULL);
1620 
1621 	doctitle = (gchar*)xml_get_data_from_node(title, XML_KEYWORD, NULL);
1622 	xml_free_doc();
1623 
1624 	return doctitle;
1625 }
1626 
1627 static void
page_set_function(linknode * Link,GList * contentList)1628 page_set_function(linknode *Link, GList *contentList)
1629 {
1630 	GList *listiter = contentList;
1631 	contentListNode *pagedata;
1632 
1633 	guint flag=0;
1634 	while (!flag && listiter != NULL) {
1635 		pagedata = listiter->data;
1636 		if (link_present_on_page(Link->pagelink, pagedata->value)) {
1637 			flag=1;
1638 			Link->page = pagedata->index - 1;
1639 		}
1640 		listiter = listiter->next;
1641 	}
1642 
1643 	if (Link->children) {
1644 		g_list_foreach(Link->children,(GFunc)page_set_function,contentList);
1645 	}
1646 }
1647 
1648 static void
epub_document_set_index_pages(GList * index,GList * contentList)1649 epub_document_set_index_pages(GList *index,GList *contentList)
1650 {
1651 	g_return_if_fail (index != NULL);
1652 	g_return_if_fail (contentList != NULL);
1653 
1654 	g_list_foreach(index,(GFunc)page_set_function,contentList);
1655 }
1656 
1657 
1658 static void
add_mathjax_script_node_to_file(gchar * filename,gchar * data)1659 add_mathjax_script_node_to_file(gchar *filename, gchar *data)
1660 {
1661 	xmlDocPtr mathdocument = xmlParseFile (filename);
1662 	xmlNodePtr mathroot = xmlDocGetRootElement(mathdocument);
1663 
1664 	if (mathroot == NULL)
1665 		return;
1666 
1667 	xmlNodePtr head = mathroot->children;
1668 
1669 	while(head != NULL) {
1670 		if (!xmlStrcmp(head->name,(xmlChar*)"head")) {
1671 			break;
1672 		}
1673 		head = head->next;
1674 	}
1675 
1676 	if (xmlStrcmp(head->name,(xmlChar*)"head")) {
1677 		return ;
1678 	}
1679 
1680 	xmlNodePtr script = xmlNewTextChild (head,NULL,(xmlChar*)"script",(xmlChar*)"");
1681 	xmlNewProp(script,(xmlChar*)"type",(xmlChar*)"text/javascript");
1682 	xmlNewProp(script,(xmlChar*)"src",(xmlChar*)data);
1683 
1684 	xmlSaveFormatFile(filename, mathdocument, 0);
1685 	xmlFreeDoc (mathdocument);
1686 }
1687 
1688 static void
epub_document_add_mathJax(gchar * containeruri,gchar * documentdir)1689 epub_document_add_mathJax(gchar* containeruri,gchar* documentdir)
1690 {
1691 	gchar *containerfilename= g_filename_from_uri(containeruri,NULL,NULL);
1692 	GString *mathjaxdir = g_string_new(MATHJAX_DIRECTORY);
1693 
1694 	gchar *mathjaxref = g_filename_to_uri(mathjaxdir->str,NULL,NULL);
1695 	gchar *nodedata = g_strdup_printf("%s/MathJax.js?config=TeX-AMS-MML_SVG",mathjaxref);
1696 
1697 	open_xml_document(containerfilename);
1698 	set_xml_root_node(NULL);
1699 	xmlNodePtr manifest = xml_get_pointer_to_node((xmlChar*)"manifest",NULL,NULL);
1700 
1701 	xmlNodePtr item = manifest->xmlChildrenNode;
1702 
1703 	while (item != NULL) {
1704 		if (xmlStrcmp(item->name,(xmlChar*)"item")) {
1705 			item = item->next;
1706 			continue;
1707 		}
1708 
1709 		xmlChar *mathml = xml_get_data_from_node(item,XML_ATTRIBUTE, (xmlChar*)"properties");
1710 
1711 		if (mathml != NULL &&
1712 		    !xmlStrcmp(mathml, (xmlChar*)"mathml") ) {
1713 			gchar *href = (gchar*)xml_get_data_from_node(item, XML_ATTRIBUTE, (xmlChar*)"href");
1714 			gchar *filename = g_strdup_printf("%s/%s",documentdir,href);
1715 
1716 			add_mathjax_script_node_to_file(filename,nodedata);
1717 			g_free(href);
1718 			g_free(filename);
1719 		}
1720 		g_free(mathml);
1721 		item = item->next;
1722 	}
1723 	xml_free_doc();
1724 	g_free(mathjaxref);
1725 	g_free(containerfilename);
1726 	g_free(nodedata);
1727 	g_string_free(mathjaxdir,TRUE);
1728 }
1729 
1730 static gboolean
epub_document_load(EvDocument * document,const char * uri,GError ** error)1731 epub_document_load (EvDocument* document,
1732                     const char* uri,
1733                     GError**    error)
1734 {
1735 	EpubDocument *epub_document = EPUB_DOCUMENT(document);
1736 	GError *err = NULL;
1737 
1738 	if ( check_mime_type (uri, &err) == FALSE )
1739 	{
1740 		/*Error would've been set by the function*/
1741 		g_propagate_error(error,err);
1742 		return FALSE;
1743 	}
1744 
1745 	extract_epub_from_container (uri,epub_document,&err);
1746 
1747 	if ( err )
1748 	{
1749 		g_propagate_error( error,err );
1750 		return FALSE;
1751 	}
1752 
1753 	/*FIXME : can this be different, ever?*/
1754 	GString *containerpath = g_string_new(epub_document->tmp_archive_dir);
1755 	g_string_append_printf(containerpath,"/META-INF/container.xml");
1756 	gchar *containeruri = g_filename_to_uri(containerpath->str,NULL,&err);
1757 	g_string_free (containerpath, TRUE);
1758 
1759 	if ( err )
1760 	{
1761 		g_propagate_error(error,err);
1762 		return FALSE;
1763 	}
1764 
1765 	gchar *contentOpfUri = get_uri_to_content (containeruri,&err,epub_document);
1766 	g_free (containeruri);
1767 
1768 	if ( contentOpfUri == NULL )
1769 	{
1770 		g_propagate_error(error,err);
1771 		return FALSE;
1772 	}
1773 
1774 	epub_document->docTitle = epub_document_set_document_title(contentOpfUri);
1775 	epub_document->index = setup_document_index(epub_document,contentOpfUri);
1776 
1777 	epub_document->contentList = setup_document_content_list (contentOpfUri,&err,epub_document->documentdir);
1778 
1779     if (epub_document->index != NULL && epub_document->contentList != NULL)
1780 	    epub_document_set_index_pages(epub_document->index, epub_document->contentList);
1781 
1782     epub_document_add_mathJax(contentOpfUri,epub_document->documentdir);
1783 	g_free (contentOpfUri);
1784 
1785 	if ( epub_document->contentList == NULL )
1786 	{
1787 		g_propagate_error(error,err);
1788 		return FALSE;
1789 	}
1790 
1791 	return TRUE;
1792 }
1793 
1794 static void
epub_document_init(EpubDocument * epub_document)1795 epub_document_init (EpubDocument *epub_document)
1796 {
1797     epub_document->archivename = NULL ;
1798     epub_document->tmp_archive_dir = NULL ;
1799     epub_document->contentList = NULL ;
1800 	epub_document->documentdir = NULL;
1801 	epub_document->index = NULL;
1802 	epub_document->docTitle = NULL;
1803 }
1804 
1805 
1806 static void
epub_document_finalize(GObject * object)1807 epub_document_finalize (GObject *object)
1808 {
1809 	EpubDocument *epub_document = EPUB_DOCUMENT (object);
1810 
1811 	if (epub_document->epubDocument != NULL) {
1812 		if (epub_remove_temporary_dir (epub_document->tmp_archive_dir) == -1)
1813 			g_warning (_("There was an error deleting “%s”."),
1814 				   epub_document->tmp_archive_dir);
1815 	}
1816 
1817 	if ( epub_document->contentList ) {
1818             g_list_free_full(epub_document->contentList,(GDestroyNotify)free_tree_nodes);
1819 			epub_document->contentList = NULL;
1820 	}
1821 
1822 	if (epub_document->index) {
1823 		g_list_free_full(epub_document->index,(GDestroyNotify)free_link_nodes);
1824 		epub_document->index = NULL;
1825 	}
1826 
1827 	if ( epub_document->tmp_archive_dir) {
1828 		g_free (epub_document->tmp_archive_dir);
1829 		epub_document->tmp_archive_dir = NULL;
1830 	}
1831 
1832 	if (epub_document->docTitle) {
1833 		g_free(epub_document->docTitle);
1834 		epub_document->docTitle = NULL;
1835 	}
1836 	if ( epub_document->archivename) {
1837 		g_free (epub_document->archivename);
1838 		epub_document->archivename = NULL;
1839 	}
1840 	if ( epub_document->documentdir) {
1841 		g_free (epub_document->documentdir);
1842 		epub_document->documentdir = NULL;
1843 	}
1844 	G_OBJECT_CLASS (epub_document_parent_class)->finalize (object);
1845 }
1846 
1847 
1848 static void
epub_document_class_init(EpubDocumentClass * klass)1849 epub_document_class_init (EpubDocumentClass *klass)
1850 {
1851 	GObjectClass    *gobject_class = G_OBJECT_CLASS (klass);
1852 	EvDocumentClass *ev_document_class = EV_DOCUMENT_CLASS (klass);
1853 
1854 	gobject_class->finalize = epub_document_finalize;
1855 	ev_document_class->load = epub_document_load;
1856 	ev_document_class->save = epub_document_save;
1857 	ev_document_class->get_n_pages = epub_document_get_n_pages;
1858 	ev_document_class->get_info = epub_document_get_info;
1859 	ev_document_class->get_page = epub_document_get_page;
1860 	ev_document_class->toggle_night_mode = epub_document_toggle_night_mode;
1861     ev_document_class->check_add_night_sheet = epub_document_check_add_night_sheet;
1862 }
1863