1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 #ifdef HAVE_CONFIG_H
57 # include "config.h"
58 #endif
59 #if NEED_GNUG_PRAGMAS
60 # pragma implementation
61 #endif
62 
63 #include "DjVuDocument.h"
64 #include "DjVmDoc.h"
65 #include "DjVmDir0.h"
66 #include "DjVmNav.h"
67 #include "DjVuNavDir.h"
68 #include "DjVuImage.h"
69 #include "DjVuFileCache.h"
70 #include "IFFByteStream.h"
71 #include "GOS.h"
72 #include "DataPool.h"
73 #include "IW44Image.h"
74 #include "GRect.h"
75 
76 #include "debug.h"
77 
78 
79 #ifdef HAVE_NAMESPACES
80 namespace DJVU {
81 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
82 }
83 #endif
84 #endif
85 
86 
87 static const char octets[4]={0x41,0x54,0x26,0x54};
88 const float	DjVuDocument::thumb_gamma=(float)2.20;
89 
90 void (* DjVuDocument::djvu_import_codec)(
91   GP<DataPool> &pool, const GURL &url, bool &needs_compression,
92   bool &needs_rename )=0;
93 
94 void (* DjVuDocument::djvu_compress_codec)(
95   GP<ByteStream> &doc,const GURL &where,bool bundled)=0;
96 
97 void
set_import_codec(void (* codec)(GP<DataPool> & pool,const GURL & url,bool & needs_compression,bool & needs_rename))98 DjVuDocument::set_import_codec(
99   void (*codec)(
100     GP<DataPool> &pool, const GURL &url, bool &needs_compression, bool &needs_rename ))
101 {
102   djvu_import_codec=codec;
103 }
104 
105 void
set_compress_codec(void (* codec)(GP<ByteStream> & doc,const GURL & where,bool bundled))106 DjVuDocument::set_compress_codec(
107   void (* codec)(
108     GP<ByteStream> &doc,const GURL &where,bool bundled))
109 {
110   djvu_compress_codec=codec;
111 }
112 
DjVuDocument(void)113 DjVuDocument::DjVuDocument(void)
114   : doc_type(UNKNOWN_TYPE),
115     needs_compression_flag(false),
116     can_compress_flag(false),
117     needs_rename_flag(false),
118     has_url_names(false),
119     recover_errors(ABORT),
120     verbose_eof(false),
121     init_started(false),
122     cache(0)
123 {
124 }
125 
126 GP<DjVuDocument>
create(GP<DataPool> pool,GP<DjVuPort> xport,DjVuFileCache * const xcache)127 DjVuDocument::create(
128   GP<DataPool> pool, GP<DjVuPort> xport, DjVuFileCache * const xcache)
129 {
130   DjVuDocument *doc=new DjVuDocument;
131   GP<DjVuDocument> retval=doc;
132   doc->init_data_pool=pool;
133   doc->start_init(GURL(),xport,xcache);
134   return retval;
135 }
136 
137 GP<DjVuDocument>
create(const GP<ByteStream> & bs,GP<DjVuPort> xport,DjVuFileCache * const xcache)138 DjVuDocument::create(
139   const GP<ByteStream> &bs, GP<DjVuPort> xport, DjVuFileCache * const xcache)
140 {
141   return create(DataPool::create(bs),xport,xcache);
142 }
143 
144 GP<DjVuDocument>
create_wait(const GURL & url,GP<DjVuPort> xport,DjVuFileCache * const xcache)145 DjVuDocument::create_wait(
146   const GURL &url, GP<DjVuPort> xport, DjVuFileCache * const xcache)
147 {
148   GP<DjVuDocument> retval=create(url,xport,xcache);
149   retval->wait_for_complete_init();
150   return retval;
151 }
152 
153 void
start_init(const GURL & url,GP<DjVuPort> xport,DjVuFileCache * xcache)154 DjVuDocument::start_init(
155   const GURL & url, GP<DjVuPort> xport, DjVuFileCache * xcache)
156 {
157    DEBUG_MSG("DjVuDocument::start_init(): initializing class...\n");
158    DEBUG_MAKE_INDENT(3);
159    if (init_started)
160       G_THROW( ERR_MSG("DjVuDocument.2nd_init") );
161    if (!get_count())
162       G_THROW( ERR_MSG("DjVuDocument.not_secure") );
163    if(url.is_empty())
164    {
165      if (!init_data_pool)
166        G_THROW( ERR_MSG("DjVuDocument.empty_url") );
167      if(init_url.is_empty())
168      {
169        init_url=invent_url("document.djvu");
170      }
171    }else
172    {
173      init_url=url;
174    }
175 
176       // Initialize
177    cache=xcache;
178    doc_type=UNKNOWN_TYPE;
179    DataPool::close_all();
180    DjVuPortcaster * pcaster=get_portcaster();
181    if (!xport)
182      xport=simple_port=new DjVuSimplePort();
183    pcaster->add_route(this, xport);
184    pcaster->add_route(this, this);
185 
186    if(!url.is_empty())
187    {
188      init_data_pool=pcaster->request_data(this, init_url);
189      if(init_data_pool)
190      {
191        if(!init_url.is_empty() && init_url.is_local_file_url() && djvu_import_codec)
192        {
193          djvu_import_codec(init_data_pool,init_url,needs_compression_flag,needs_rename_flag);
194        }
195        if(needs_rename_flag)
196          can_compress_flag=true;
197      }
198      if (!init_data_pool)
199      {
200        G_THROW( ERR_MSG("DjVuDocument.fail_URL") "\t"+init_url.get_string());
201      }
202    }
203       // Now we say it is ready
204    init_started=true;
205 
206    init_thread_flags=STARTED;
207    init_life_saver=this;
208    init_thr.create(static_init_thread, this);
209 }
210 
~DjVuDocument(void)211 DjVuDocument::~DjVuDocument(void)
212 {
213       // No more messages, please. We're being destroyed.
214    get_portcaster()->del_port(this);
215 
216       // We want to stop any DjVuFile which has been created by us
217       // and is still being decoded. We have to stop them manually because
218       // they keep the "life saver" in the decoding thread and won't stop
219       // when we clear the last reference to them
220    {
221       GCriticalSectionLock lock(&ufiles_lock);
222       for(GPosition pos=ufiles_list;pos;++pos)
223       {
224           GP<DjVuFile> file=ufiles_list[pos]->file;
225           file->stop_decode(false);
226           file->stop(false);	// Disable any access to data
227       }
228       ufiles_list.empty();
229    }
230 
231    GPList<DjVuPort> ports=get_portcaster()->prefix_to_ports(get_int_prefix());
232    for(GPosition pos=ports;pos;++pos)
233    {
234      GP<DjVuPort> port=ports[pos];
235      if (port->inherits("DjVuFile"))
236      {
237        DjVuFile * file=(DjVuFile *) (DjVuPort *) port;
238        file->stop_decode(false);
239        file->stop(false);	// Disable any access to data
240      }
241    }
242    DataPool::close_all();
243 }
244 
245 void
stop_init(void)246 DjVuDocument::stop_init(void)
247 {
248    DEBUG_MSG("DjVuDocument::stop_init(): making sure that the init thread dies.\n");
249    DEBUG_MAKE_INDENT(3);
250 
251    GMonitorLock lock(&init_thread_flags);
252    while((init_thread_flags & STARTED) &&
253 	 !(init_thread_flags & FINISHED))
254    {
255       if (init_data_pool) init_data_pool->stop(true);	// blocking operation
256 
257       if (ndir_file) ndir_file->stop(false);
258 
259       {
260 	 GCriticalSectionLock lock(&ufiles_lock);
261 	 for(GPosition pos=ufiles_list;pos;++pos)
262 	    ufiles_list[pos]->file->stop(false);	// Disable any access to data
263 	 ufiles_list.empty();
264       }
265 
266       init_thread_flags.wait(50);
267    }
268 }
269 
270 void
check() const271 DjVuDocument::check() const
272 {
273   if (!init_started)
274     G_THROW( ERR_MSG("DjVuDocument.not_init") );
275 }
276 
277 void
static_init_thread(void * cl_data)278 DjVuDocument::static_init_thread(void * cl_data)
279 {
280   DjVuDocument * th=(DjVuDocument *) cl_data;
281   GP<DjVuDocument> life_saver=th;
282   th->init_life_saver=0;
283   G_TRY {
284     th->init_thread();
285   } G_CATCH(exc) {
286     G_TRY {
287       int changed = DjVuDocument::DOC_INIT_FAILED;
288       th->flags |= changed;
289       get_portcaster()->notify_doc_flags_changed(th, changed, 0);
290     } G_CATCH_ALL {
291     } G_ENDCATCH;
292     G_TRY {
293       th->check_unnamed_files();
294       if (!exc.cmp_cause(ByteStream::EndOfFile) && th->verbose_eof)
295         get_portcaster()->notify_error(th, ERR_MSG("DjVuDocument.init_eof"));
296       else if (!exc.cmp_cause(DataPool::Stop))
297         get_portcaster()->notify_status(th, ERR_MSG("DjVuDocument.stopped"));
298       else
299         get_portcaster()->notify_error(th, exc.get_cause());
300     } G_CATCH_ALL {
301     } G_ENDCATCH;
302     th->init_thread_flags |= FINISHED;
303   } G_ENDCATCH;
304 }
305 
306 void
init_thread(void)307 DjVuDocument::init_thread(void)
308       // This function is run in a separate thread.
309       // The goal is to detect the document type (BUNDLED, OLD_INDEXED, etc.)
310       // and decode navigation directory.
311 {
312    DEBUG_MSG("DjVuDocument::init_thread(): guessing what we're dealing with\n");
313    DEBUG_MAKE_INDENT(3);
314 
315    DjVuPortcaster * pcaster=get_portcaster();
316 
317    GP<ByteStream> stream=init_data_pool->get_stream();
318 
319    GP<IFFByteStream> giff=IFFByteStream::create(stream);
320    IFFByteStream &iff=*giff;
321    GUTF8String chkid;
322    int size=iff.get_chunk(chkid);
323    if (!size)
324      G_THROW( ByteStream::EndOfFile );
325    if (size < 0)
326      G_THROW( ERR_MSG("DjVuDocument.no_file") );
327    if (size<8)
328      G_THROW( ERR_MSG("DjVuDocument.not_DjVu") );
329    if (chkid=="FORM:DJVM")
330    {
331      DEBUG_MSG("Got DJVM document here\n");
332      DEBUG_MAKE_INDENT(3);
333 
334      size=iff.get_chunk(chkid);
335      if (chkid=="DIRM")
336        {
337 	 djvm_dir=DjVmDir::create();
338 	 djvm_dir->decode(iff.get_bytestream());
339 	 iff.close_chunk();
340 	 if (djvm_dir->is_bundled())
341            {
342              DEBUG_MSG("Got BUNDLED file.\n");
343              doc_type=BUNDLED;
344            }
345          else
346            {
347              DEBUG_MSG("Got INDIRECT file.\n");
348              doc_type=INDIRECT;
349            }
350 	 flags|=DOC_TYPE_KNOWN | DOC_DIR_KNOWN;
351 	 pcaster->notify_doc_flags_changed(this,
352                                            DOC_TYPE_KNOWN | DOC_DIR_KNOWN, 0);
353 	 check_unnamed_files();
354 
355          /* Check for NAVM */
356          size=iff.get_chunk(chkid);
357          if (size && chkid=="NAVM")
358            {
359              djvm_nav=DjVmNav::create();
360              djvm_nav->decode(iff.get_bytestream());
361              iff.close_chunk();
362            }
363        }
364      else if (chkid=="DIR0")
365        {
366 	 DEBUG_MSG("Got OLD_BUNDLED file.\n");
367 	 doc_type=OLD_BUNDLED;
368 	 flags|=DOC_TYPE_KNOWN;
369 	 pcaster->notify_doc_flags_changed(this, DOC_TYPE_KNOWN, 0);
370 	 check_unnamed_files();
371        }
372      else
373        G_THROW( ERR_MSG("DjVuDocument.bad_format") );
374 
375      if (doc_type==OLD_BUNDLED)
376        {
377          // Read the DjVmDir0 directory. We are unable to tell what
378          // files are pages and what are included at this point.
379          // We only know that the first file with DJVU (BM44 or PM44)
380          // form *is* the first page. The rest will become known
381          // after we decode DjVuNavDir
382 	 djvm_dir0=DjVmDir0::create();
383 	 djvm_dir0->decode(*iff.get_bytestream());
384 	 iff.close_chunk();
385          // Get offset to the first DJVU, PM44 or BM44 chunk
386 	 int first_page_offset=0;
387 	 while(!first_page_offset)
388            {
389              int offset;
390              size=iff.get_chunk(chkid, &offset);
391              if (size==0) G_THROW( ERR_MSG("DjVuDocument.no_page") );
392              if (chkid=="FORM:DJVU" ||
393                  chkid=="FORM:PM44" || chkid=="FORM:BM44")
394                {
395                  DEBUG_MSG("Got 1st page offset=" << offset << "\n");
396                  first_page_offset=offset;
397                }
398              iff.close_chunk();
399            }
400 
401          // Now get the name of this file
402 	 int file_num;
403 	 for(file_num=0;file_num<djvm_dir0->get_files_num();file_num++)
404            {
405              DjVmDir0::FileRec & file=*djvm_dir0->get_file(file_num);
406              if (file.offset==first_page_offset)
407                {
408                  first_page_name=file.name;
409                  break;
410                }
411            }
412 	 if (!first_page_name.length())
413            G_THROW( ERR_MSG("DjVuDocument.no_page") );
414 	 flags|=DOC_DIR_KNOWN;
415 	 pcaster->notify_doc_flags_changed(this, DOC_DIR_KNOWN, 0);
416 	 check_unnamed_files();
417        }
418    }
419    else // chkid!="FORM:DJVM"
420      {
421        // DJVU format
422        DEBUG_MSG("Got DJVU OLD_INDEXED or SINGLE_PAGE document here.\n");
423        doc_type=SINGLE_PAGE;
424        flags |= DOC_TYPE_KNOWN;
425        pcaster->notify_doc_flags_changed(this, DOC_TYPE_KNOWN, 0);
426        check_unnamed_files();
427      }
428    if (doc_type==OLD_BUNDLED || doc_type==SINGLE_PAGE)
429      {
430        DEBUG_MSG("Searching for NDIR chunks...\n");
431        ndir_file=get_djvu_file(-1);
432        if (ndir_file) ndir=ndir_file->decode_ndir();
433        ndir_file=0;	// Otherwise ~DjVuDocument() will stop (=kill) it
434        if (!ndir)
435          {
436            // Seems to be 1-page old-style document. Create dummy NDIR
437            if (doc_type==OLD_BUNDLED)
438              {
439                ndir=DjVuNavDir::create(GURL::UTF8("directory",init_url));
440                ndir->insert_page(-1, first_page_name);
441              }
442            else
443              {
444                ndir=DjVuNavDir::create(GURL::UTF8("directory",init_url.base()));
445                ndir->insert_page(-1, init_url.fname());
446              }
447          }
448        else
449          {
450            if (doc_type==SINGLE_PAGE)
451              doc_type=OLD_INDEXED;
452          }
453        flags|=DOC_NDIR_KNOWN;
454        pcaster->notify_doc_flags_changed(this, DOC_NDIR_KNOWN, 0);
455        check_unnamed_files();
456      }
457 
458    flags |= DOC_INIT_OK;
459    pcaster->notify_doc_flags_changed(this, DOC_INIT_OK, 0);
460    check_unnamed_files();
461    init_thread_flags|=FINISHED;
462    DEBUG_MSG("DOCUMENT IS FULLY INITIALIZED now: doc_type='" <<
463 	     (doc_type==BUNDLED ? "BUNDLED" :
464 	      doc_type==OLD_BUNDLED ? "OLD_BUNDLED" :
465 	      doc_type==INDIRECT ? "INDIRECT" :
466 	      doc_type==OLD_INDEXED ? "OLD_INDEXED" :
467 	      doc_type==SINGLE_PAGE ? "SINGLE_PAGE" :
468 	      "UNKNOWN") << "'\n");
469 }
470 
471 bool
wait_for_complete_init(void)472 DjVuDocument::wait_for_complete_init(void)
473 {
474   flags.enter();
475   while(!(flags & DOC_INIT_FAILED) &&
476         !(flags & DOC_INIT_OK)) flags.wait();
477   flags.leave();
478   init_thread_flags.enter();
479   while (!(init_thread_flags & FINISHED))
480     init_thread_flags.wait();
481   init_thread_flags.leave();
482   return (flags & (DOC_INIT_OK | DOC_INIT_FAILED))!=0;
483 }
484 
485 int
wait_get_pages_num(void) const486 DjVuDocument::wait_get_pages_num(void) const
487 {
488   GSafeFlags &f=const_cast<GSafeFlags &>(flags);
489   f.enter();
490   while(!(f & DOC_TYPE_KNOWN) &&
491         !(f & DOC_INIT_FAILED) &&
492         !(f & DOC_INIT_OK)) f.wait();
493   f.leave();
494   return get_pages_num();
495 }
496 
497 GUTF8String
get_int_prefix(void) const498 DjVuDocument::get_int_prefix(void) const
499 {
500       // These NAMEs are used to enable DjVuFile sharing inside the same
501       // DjVuDocument using DjVuPortcaster. Since URLs are unique to the
502       // document, other DjVuDocuments cannot retrieve files until they're
503       // assigned some permanent name. After '?' there should be the real
504       // file's URL. Please note, that output of this function is used only
505       // as name for DjVuPortcaster. Not as a URL.
506    GUTF8String retval;
507    return retval.format("document_%p%d?", this, hash(init_url));
508 }
509 
510 void
set_file_aliases(const DjVuFile * file)511 DjVuDocument::set_file_aliases(const DjVuFile * file)
512 {
513    DEBUG_MSG("DjVuDocument::set_file_aliases(): setting global aliases for file '"
514 	     << file->get_url() << "'\n");
515    DEBUG_MAKE_INDENT(3);
516 
517    DjVuPortcaster * pcaster=DjVuPort::get_portcaster();
518 
519    GMonitorLock lock(&((DjVuFile *) file)->get_safe_flags());
520    pcaster->clear_aliases(file);
521    if (file->is_decode_ok() && cache)
522    {
523 	 // If file is successfully decoded and caching is enabled,
524 	 // assign a global alias to this file, so that any other
525 	 // DjVuDocument will be able to use it.
526 
527       pcaster->add_alias(file, file->get_url().get_string());
528       if (flags & (DOC_NDIR_KNOWN | DOC_DIR_KNOWN))
529       {
530 	 int page_num=url_to_page(file->get_url());
531 	 if (page_num>=0)
532 	 {
533 	    if (page_num==0) pcaster->add_alias(file, init_url.get_string()+"#-1");
534 	    pcaster->add_alias(file, init_url.get_string()+"#"+GUTF8String(page_num));
535 	 }
536       }
537 	 // The following line MUST stay here. For OLD_INDEXED documents
538 	 // a page may finish decoding before DIR or NDIR becomes known
539 	 // (multithreading, remember), so the code above would not execute
540       pcaster->add_alias(file, file->get_url().get_string()+"#-1");
541    } else pcaster->add_alias(file, get_int_prefix()+file->get_url());
542 }
543 
544 void
check_unnamed_files(void)545 DjVuDocument::check_unnamed_files(void)
546 {
547   DEBUG_MSG("DjVuDocument::check_unnamed_files(): Seeing if we can fix some...\n");
548   DEBUG_MAKE_INDENT(3);
549 
550   if (flags & DOC_INIT_FAILED)
551   {
552     // Init failed. All unnamed files should be terminated
553     GCriticalSectionLock lock(&ufiles_lock);
554     for(GPosition pos=ufiles_list;pos;++pos)
555     {
556       GP<DjVuFile> file=ufiles_list[pos]->file;
557       file->stop_decode(true);
558       file->stop(false);	// Disable any access to data
559     }
560     ufiles_list.empty();
561     return;
562   }
563 
564   if ((flags & DOC_TYPE_KNOWN)==0)
565     return;
566 
567   // See the list of unnamed files (created when there was insufficient
568   // information about DjVuDocument structure) and try to fix those,
569   // which can be fixed at this time
570   while(true)
571   {
572     DjVuPortcaster * pcaster=get_portcaster();
573 
574     GP<UnnamedFile> ufile;
575     GURL new_url;
576     GPosition pos ;
577 	   GCriticalSectionLock lock(&ufiles_lock);
578      for(pos=ufiles_list;pos;)
579      {
580 	G_TRY
581         {
582           GP<UnnamedFile> f=ufiles_list[pos];
583           if (f->id_type==UnnamedFile::ID)
584             new_url=id_to_url(f->id);
585           else
586             new_url=page_to_url(f->page_num);
587           if (!new_url.is_empty())
588           {
589             ufile=f;
590             // Don't take it off the list. We want to be
591             // able to stop the init from ~DjVuDocument();
592             //
593             // ufiles_list.del(pos);
594             break;
595           } else if (is_init_complete())
596           {
597             // No empty URLs are allowed at this point.
598             // We now know all information about the document
599             // and can determine if a page is inside it or not
600             f->data_pool->set_eof();
601             GUTF8String msg;
602             if (f->id_type==UnnamedFile::ID)
603               msg= ERR_MSG("DjVuDocument.miss_page_name") "\t"+f->id;
604             else
605               msg= ERR_MSG("DjVuDocument.miss_page_num") "\t"+GUTF8String(f->page_num);
606             G_THROW(msg);
607           }
608           ++pos;
609         }
610         G_CATCH(exc)
611         {
612           pcaster->notify_error(this, exc.get_cause());
613           GP<DataPool> pool=ufiles_list[pos]->data_pool;
614           if (pool)
615             pool->stop();
616           GPosition this_pos=pos;
617           ++pos;
618           ufiles_list.del(this_pos);
619         }
620         G_ENDCATCH;
621      }
622 
623      if (ufile && !new_url.is_empty())
624        {
625          DEBUG_MSG("Fixing file: '" << ufile->url << "'=>'" << new_url << "'\n");
626          // Now, once we know its real URL we can request a real DataPool and
627          // can connect the DataPool owned by DjVuFile to that real one
628          // Note, that now request_data() will not play fool because
629          // we have enough information
630 
631          G_TRY
632            {
633              if (ufile->data_pool)
634                {
635                  GP<DataPool> new_pool=pcaster->request_data(ufile->file, new_url);
636                  if(!new_pool)
637                    G_THROW( ERR_MSG("DjVuDocument.fail_URL") "\t"+new_url.get_string());
638                  ufile->data_pool->connect(new_pool);
639                }
640              ufile->file->set_name(new_url.fname());
641              ufile->file->move(new_url.base());
642              set_file_aliases(ufile->file);
643            }
644          G_CATCH(exc)
645            {
646              pcaster->notify_error(this, exc.get_cause());
647            }
648          G_ENDCATCH;
649        }
650      else
651        break;
652 
653      // Remove the 'ufile' from the list
654      for(pos=ufiles_list;pos;++pos)
655        if (ufiles_list[pos]==ufile)
656          {
657            ufiles_list.del(pos);
658            break;
659          }
660   } // while(1)
661 }
662 
663 int
get_pages_num(void) const664 DjVuDocument::get_pages_num(void) const
665 {
666   check();
667   if (flags & DOC_TYPE_KNOWN)
668     {
669       if (doc_type==BUNDLED || doc_type==INDIRECT)
670 	return djvm_dir->get_pages_num();
671       else if (flags & DOC_NDIR_KNOWN)
672 	return ndir->get_pages_num();
673     }
674   return 1;
675 }
676 
677 GURL
page_to_url(int page_num) const678 DjVuDocument::page_to_url(int page_num) const
679 {
680    check();
681    DEBUG_MSG("DjVuDocument::page_to_url(): page_num=" << page_num << "\n");
682    DEBUG_MAKE_INDENT(3);
683 
684    GURL url;
685    if (flags & DOC_TYPE_KNOWN)
686       switch(doc_type)
687       {
688 	 case SINGLE_PAGE:
689          {
690            if (page_num<1)
691              url=init_url;
692            else
693              G_THROW( ERR_MSG("DjVuDocument.big_num") );
694            break;
695          }
696 	 case OLD_INDEXED:
697 	 {
698 	    if (page_num<0)
699               url=init_url;
700 	    else if (flags & DOC_NDIR_KNOWN)
701               url=ndir->page_to_url(page_num);
702 	    break;
703 	 }
704 	 case OLD_BUNDLED:
705 	 {
706 	    if (page_num<0)
707               page_num=0;
708 	    if (page_num==0 && (flags & DOC_DIR_KNOWN))
709               url=GURL::UTF8(first_page_name,init_url);
710 	    else if (flags & DOC_NDIR_KNOWN)
711               url=ndir->page_to_url(page_num);
712 	    break;
713 	 }
714 	 case BUNDLED:
715 	 {
716 	    if (page_num<0)
717               page_num=0;
718 	    if (flags & DOC_DIR_KNOWN)
719 	    {
720 	      GP<DjVmDir::File> file=djvm_dir->page_to_file(page_num);
721 	      if (!file)
722                 G_THROW( ERR_MSG("DjVuDocument.big_num") );
723 	      url=GURL::UTF8(file->get_load_name(),init_url);
724 	    }
725 	    break;
726 	 }
727 	 case INDIRECT:
728 	 {
729 	    if (page_num<0) page_num=0;
730 	    if (flags & DOC_DIR_KNOWN)
731 	    {
732 	       GP<DjVmDir::File> file=djvm_dir->page_to_file(page_num);
733 	       if (!file)
734                  G_THROW( ERR_MSG("DjVuDocument.big_num") );
735 	       url=GURL::UTF8(file->get_load_name(),init_url.base());
736 	    }
737 	    break;
738 	 }
739 	 default:
740 	    G_THROW( ERR_MSG("DjVuDocument.unk_type") );
741       }
742    return url;
743 }
744 
745 int
url_to_page(const GURL & url) const746 DjVuDocument::url_to_page(const GURL & url) const
747 {
748    check();
749    DEBUG_MSG("DjVuDocument::url_to_page(): url='" << url << "'\n");
750    DEBUG_MAKE_INDENT(3);
751 
752    int page_num=-1;
753    if (flags & DOC_TYPE_KNOWN)
754       switch(doc_type)
755       {
756 	 case SINGLE_PAGE:
757 	 case OLD_BUNDLED:
758 	 case OLD_INDEXED:
759 	 {
760 	    if (flags & DOC_NDIR_KNOWN) page_num=ndir->url_to_page(url);
761 	    break;
762 	 }
763 	 case BUNDLED:
764 	 {
765 	    if (flags & DOC_DIR_KNOWN)
766 	    {
767 	       GP<DjVmDir::File> file;
768 	       if (url.base()==init_url)
769                  file=djvm_dir->id_to_file(url.fname());
770 	       if (file)
771                  page_num=file->get_page_num();
772 	    }
773 	    break;
774 	 }
775 	 case INDIRECT:
776 	 {
777 	    if (flags & DOC_DIR_KNOWN)
778 	    {
779 	       GP<DjVmDir::File> file;
780 	       if (url.base()==init_url.base())
781                  file=djvm_dir->id_to_file(url.fname());
782 	       if (file)
783                  page_num=file->get_page_num();
784 	    }
785 	    break;
786 	 }
787 	 default:
788 	    G_THROW( ERR_MSG("DjVuDocument.unk_type") );
789       }
790    return page_num;
791 }
792 
793 GURL
id_to_url(const GUTF8String & id) const794 DjVuDocument::id_to_url(const GUTF8String & id) const
795 {
796    check();
797    DEBUG_MSG("DjVuDocument::id_to_url(): translating ID='" << id << "' to URL\n");
798    DEBUG_MAKE_INDENT(3);
799 
800    if (flags & DOC_TYPE_KNOWN)
801       switch(doc_type)
802       {
803 	 case BUNDLED:
804 	    if (flags & DOC_DIR_KNOWN)
805 	    {
806 	      GP<DjVmDir::File> file=djvm_dir->id_to_file(id);
807 	      if (!file)
808                 file=djvm_dir->name_to_file(id);
809 	      if (!file)
810                 file=djvm_dir->title_to_file(id);
811 	      if (file)
812 	        return GURL::UTF8(file->get_load_name(),init_url);
813 	    }
814 	    break;
815 	 case INDIRECT:
816 	    if (flags & DOC_DIR_KNOWN)
817 	    {
818 	       GP<DjVmDir::File> file=djvm_dir->id_to_file(id);
819 	       if (!file)
820                  file=djvm_dir->name_to_file(id);
821 	      if (!file)
822                 file=djvm_dir->title_to_file(id);
823 	       if (file)
824 	         return GURL::UTF8(file->get_load_name(),init_url.base());
825 	    }
826 	    break;
827 	 case OLD_BUNDLED:
828 	    if (flags & DOC_DIR_KNOWN)
829 	    {
830 	       GP<DjVmDir0::FileRec> frec=djvm_dir0->get_file(id);
831 	       if (frec)
832                  return GURL::UTF8(id,init_url);
833 	    }
834 	    break;
835 	 case OLD_INDEXED:
836 	 case SINGLE_PAGE:
837 	    {
838 	       GURL url = GURL::UTF8(id,init_url.base());
839 	       if (url.fname() == "-")
840 	          G_THROW("Illegal include chunk (corrupted file?)");
841 	       return url;
842 	    }
843 	    break;
844       }
845    return GURL();
846 }
847 
848 GURL
id_to_url(const DjVuPort * source,const GUTF8String & id)849 DjVuDocument::id_to_url(const DjVuPort * source, const GUTF8String &id)
850 {
851    return id_to_url(id);
852 }
853 
854 GP<DjVuFile>
url_to_file(const GURL & url,bool dont_create) const855 DjVuDocument::url_to_file(const GURL & url, bool dont_create) const
856       // This function is private and is called from two places:
857       // id_to_file() and get_djvu_file() ONLY when the structure is known
858 {
859    check();
860    DEBUG_MSG("DjVuDocument::url_to_file(): url='" << url << "'\n");
861    DEBUG_MAKE_INDENT(3);
862 
863       // Try DjVuPortcaster to find existing files.
864    DjVuPortcaster * pcaster=DjVuPort::get_portcaster();
865    GP<DjVuPort> port;
866 
867    if (cache)
868    {
869 	 // First - fully decoded files
870       port=pcaster->alias_to_port(url.get_string());
871       if (port && port->inherits("DjVuFile"))
872       {
873 	 DEBUG_MSG("found fully decoded file using DjVuPortcaster\n");
874 	 return (DjVuFile *) (DjVuPort *) port;
875       }
876    }
877 
878       // Second - internal files
879    port=pcaster->alias_to_port(get_int_prefix()+url);
880    if (port && port->inherits("DjVuFile"))
881    {
882       DEBUG_MSG("found internal file using DjVuPortcaster\n");
883       return (DjVuFile *) (DjVuPort *) port;
884    }
885 
886    GP<DjVuFile> file;
887 
888    if (!dont_create)
889    {
890       DEBUG_MSG("creating a new file\n");
891       file=DjVuFile::create(url,const_cast<DjVuDocument *>(this),recover_errors,verbose_eof);
892       const_cast<DjVuDocument *>(this)->set_file_aliases(file);
893    }
894 
895    return file;
896 }
897 
898 GP<DjVuFile>
get_djvu_file(int page_num,bool dont_create) const899 DjVuDocument::get_djvu_file(int page_num, bool dont_create) const
900 {
901    check();
902    DEBUG_MSG("DjVuDocument::get_djvu_file(): request for page " << page_num << "\n");
903    DEBUG_MAKE_INDENT(3);
904 
905    DjVuPortcaster * pcaster=DjVuPort::get_portcaster();
906 
907    GURL url;
908    {
909 	 // I'm locking the flags because depending on what page_to_url()
910 	 // returns me, I'll be creating DjVuFile in different ways.
911 	 // And I don't want the situation to change between the moment I call
912 	 // id_to_url() and I actually create DjVuFile
913       GMonitorLock lock(&(const_cast<DjVuDocument *>(this)->flags));
914       url=page_to_url(page_num);
915       if (url.is_empty())
916       {
917 	    // If init is complete and url is empty, we know for sure, that
918 	    // smth is wrong with the page_num. So we can return ZERO.
919 	    // Otherwise we create a temporary file and wait for init to finish
920 	 if (is_init_complete()) return 0;
921 
922 	 DEBUG_MSG("Structure is not known => check <doc_url>#<page_num> alias...\n");
923 	 GP<DjVuPort> port;
924 	 if (cache)
925 	    port=pcaster->alias_to_port(init_url.get_string()+"#"+GUTF8String(page_num));
926 	 if (!port || !port->inherits("DjVuFile"))
927 	 {
928 	    DEBUG_MSG("failed => invent dummy URL and proceed\n");
929 
930 	       // Invent some dummy temporary URL. I don't care what it will
931 	       // be. I'll remember the page_num and will generate the correct URL
932 	       // after I learn what the document is
933             GUTF8String name("page");
934             name+=GUTF8String(page_num);
935             name+=".djvu";
936             url=invent_url(name);
937 
938             GCriticalSectionLock(&(const_cast<DjVuDocument *>(this)->ufiles_lock));
939 	    for(GPosition pos=ufiles_list;pos;++pos)
940 	    {
941 	       GP<UnnamedFile> f=ufiles_list[pos];
942 	       if (f->url==url) return f->file;
943 	    }
944 	    GP<UnnamedFile> ufile=new UnnamedFile(UnnamedFile::PAGE_NUM, 0,
945 						  page_num, url, 0);
946 
947 	       // We're adding the record to the list before creating the DjVuFile
948 	       // because DjVuFile::init() will call request_data(), and the
949 	       // latter should be able to find the record.
950 	       //
951 	       // We also want to keep ufiles_lock to make sure that when
952 	       // request_data() is called, the record is still there
953 	    const_cast<DjVuDocument *>(this)->ufiles_list.append(ufile);
954 
955 	    GP<DjVuFile> file=
956               DjVuFile::create(url,const_cast<DjVuDocument *>(this),recover_errors,verbose_eof);
957 	    ufile->file=file;
958 	    return file;
959 	 } else url=((DjVuFile *) (DjVuPort *) port)->get_url();
960       }
961    }
962 
963    GP<DjVuFile> file=url_to_file(url, dont_create);
964    if (file)
965      pcaster->add_route(file, const_cast<DjVuDocument *>(this));
966    return file;
967 }
968 
969 GURL
invent_url(const GUTF8String & name) const970 DjVuDocument::invent_url(const GUTF8String &name) const
971 {
972    GUTF8String buffer;
973    buffer.format("djvufileurl://%p/%s", this, (const char *)name);
974    return GURL::UTF8(buffer);
975 }
976 
977 GP<DjVuFile>
get_djvu_file(const GUTF8String & id,bool dont_create)978 DjVuDocument::get_djvu_file(const GUTF8String& id, bool dont_create)
979 {
980   check();
981   DEBUG_MSG("DjVuDocument::get_djvu_file(): ID='" << id << "'\n");
982   DEBUG_MAKE_INDENT(3);
983   if (!id.length())
984     return get_djvu_file(-1);
985 
986 // Integers are not supported, only ID's
987 //  if (id.is_int())
988 //     return get_djvu_file(id.toInt(),dont_create);
989 
990   GURL url;
991   // I'm locking the flags because depending on what id_to_url()
992   // returns me, I'll be creating DjVuFile in different ways.
993   // And I don't want the situation to change between the moment I call
994   // id_to_url() and I actually create DjVuFile
995   {
996     GMonitorLock lock(&flags);
997     url=id_to_url(id);
998     if(url.is_empty() && !id.is_int())
999     {
1000       // If init is complete, we know for sure, that there is no such
1001       // file with ID 'id' in the document. Otherwise we have to
1002       // create a temporary file and wait for the init to finish
1003       if (is_init_complete())
1004         return 0;
1005       // Invent some dummy temporary URL. I don't care what it will
1006       // be. I'll remember the ID and will generate the correct URL
1007       // after I learn what the document is
1008       url=invent_url(id);
1009       DEBUG_MSG("Invented url='" << url << "'\n");
1010 
1011       GCriticalSectionLock lock(&ufiles_lock);
1012       for(GPosition pos=ufiles_list;pos;++pos)
1013       {
1014         GP<UnnamedFile> f=ufiles_list[pos];
1015         if (f->url==url)
1016           return f->file;
1017       }
1018       GP<UnnamedFile> ufile=new UnnamedFile(UnnamedFile::ID, id, 0, url, 0);
1019 
1020       // We're adding the record to the list before creating the DjVuFile
1021       // because DjVuFile::init() will call request_data(), and the
1022       // latter should be able to find the record.
1023       //
1024       // We also want to keep ufiles_lock to make sure that when
1025       // request_data() is called, the record is still there
1026       ufiles_list.append(ufile);
1027 
1028       GP<DjVuFile> file=DjVuFile::create(url,this,recover_errors,verbose_eof);
1029       ufile->file=file;
1030       return file;
1031     }
1032   }
1033 
1034   return get_djvu_file(url,dont_create);
1035 }
1036 
1037 GP<DjVuFile>
get_djvu_file(const GURL & url,bool dont_create)1038 DjVuDocument::get_djvu_file(const GURL& url, bool dont_create)
1039 {
1040    check();
1041    DEBUG_MSG("DjVuDocument::get_djvu_file(): URL='" << url << "'\n");
1042    DEBUG_MAKE_INDENT(3);
1043 
1044    if (url.is_empty())
1045      return 0;
1046 
1047    const GP<DjVuFile> file(url_to_file(url, dont_create));
1048 
1049    if (file)
1050      get_portcaster()->add_route(file, this);
1051 
1052    return file;
1053 }
1054 
1055 GP<DjVuImage>
get_page(int page_num,bool sync,DjVuPort * port) const1056 DjVuDocument::get_page(int page_num, bool sync, DjVuPort * port) const
1057 {
1058    check();
1059    DEBUG_MSG("DjVuDocument::get_page(): request for page " << page_num << "\n");
1060    DEBUG_MAKE_INDENT(3);
1061 
1062    GP<DjVuImage> dimg;
1063    const GP<DjVuFile> file(get_djvu_file(page_num));
1064    if (file)
1065    {
1066      dimg=DjVuImage::create(file);
1067      if (port)
1068        DjVuPort::get_portcaster()->add_route(dimg, port);
1069 
1070      file->resume_decode();
1071      if (dimg && sync)
1072        dimg->wait_for_complete_decode();
1073    }
1074    return dimg;
1075 }
1076 
1077 GP<DjVuImage>
get_page(const GUTF8String & id,bool sync,DjVuPort * port)1078 DjVuDocument::get_page(const GUTF8String &id, bool sync, DjVuPort * port)
1079 {
1080    check();
1081    DEBUG_MSG("DjVuDocument::get_page(): ID='" << id << "'\n");
1082    DEBUG_MAKE_INDENT(3);
1083 
1084    GP<DjVuImage> dimg;
1085    const GP<DjVuFile> file(get_djvu_file(id));
1086    if(file)
1087    {
1088      dimg=DjVuImage::create(file);
1089      if (port)
1090        DjVuPort::get_portcaster()->add_route(dimg, port);
1091 
1092      file->resume_decode();
1093      if (dimg && sync)
1094        dimg->wait_for_complete_decode();
1095    }
1096    return dimg;
1097 }
1098 
1099 void
process_threqs(void)1100 DjVuDocument::process_threqs(void)
1101       // Will look thru threqs_list and try to fulfil every request
1102 {
1103   GCriticalSectionLock lock(&threqs_lock);
1104   for(GPosition pos=threqs_list;pos;)
1105   {
1106     GP<ThumbReq> req=threqs_list[pos];
1107     bool remove=false;
1108     if (req->thumb_file)
1109     {
1110       G_TRY {
1111 	       // There is supposed to be a file with thumbnails
1112         if (req->thumb_file->is_data_present())
1113         {
1114           // Cool, we can extract the thumbnail now
1115           GP<ByteStream> str=req->thumb_file->get_init_data_pool()->get_stream();
1116           GP<IFFByteStream> giff=IFFByteStream::create(str);
1117           IFFByteStream &iff=*giff;
1118           GUTF8String chkid;
1119           if (!iff.get_chunk(chkid) || chkid!="FORM:THUM")
1120             G_THROW( ERR_MSG("DjVuDocument.bad_thumb") );
1121           for(int i=0;i<req->thumb_chunk;i++)
1122           {
1123             if (!iff.get_chunk(chkid))
1124               G_THROW( ERR_MSG("DjVuDocument.bad_thumb") );
1125             iff.close_chunk();
1126           }
1127           if (!iff.get_chunk(chkid) || chkid!="TH44")
1128             G_THROW( ERR_MSG("DjVuDocument.bad_thumb") );
1129 
1130           // Copy the data
1131           char buffer[1024];
1132           int length;
1133           while((length=iff.read(buffer, 1024)))
1134             req->data_pool->add_data(buffer, length);
1135           req->data_pool->set_eof();
1136 
1137           // Also add this file to cache so that we won't have
1138           // to download it next time
1139           add_to_cache(req->thumb_file);
1140           req->thumb_file=0;
1141           req->image_file=0;
1142           remove=true;
1143         }
1144       } G_CATCH(exc) {
1145         GUTF8String msg= ERR_MSG("DjVuDocument.cant_extract") "\n";
1146         msg+=exc.get_cause();
1147         get_portcaster()->notify_error(this, msg);
1148 	       // Switch this request to the "decoding" mode
1149         req->image_file=get_djvu_file(req->page_num);
1150         req->thumb_file=0;
1151         req->data_pool->set_eof();
1152         remove=true;
1153       } G_ENDCATCH;
1154     } // if (req->thumb_file)
1155 
1156     if (req->image_file)
1157     {
1158       G_TRY {
1159 	       // Decode the file if necessary. Or just used predecoded image.
1160         GSafeFlags & file_flags=req->image_file->get_safe_flags();
1161         {
1162           GMonitorLock lock(&file_flags);
1163           if (!req->image_file->is_decoding())
1164           {
1165             if (req->image_file->is_decode_ok())
1166             {
1167               // We can generate it now
1168               const GP<DjVuImage> dimg(DjVuImage::create(req->image_file));
1169 
1170               dimg->wait_for_complete_decode();
1171 
1172               int width = 160;
1173               int height = 160;
1174 
1175               if( dimg->get_width() )
1176                 width = dimg->get_width();
1177               if( dimg->get_height() )
1178                 height = dimg->get_height();
1179 
1180               GRect rect(0, 0, 160, height*160/width);
1181               GP<GPixmap> pm=dimg->get_pixmap(rect, rect, thumb_gamma);
1182               if (!pm)
1183               {
1184                 GP<GBitmap> bm=dimg->get_bitmap(rect, rect, sizeof(int));
1185                 if(bm)
1186                   pm=GPixmap::create(*bm);
1187                 else
1188                   pm = GPixmap::create(rect.height(), rect.width(),
1189                                        &GPixel::WHITE);
1190               }
1191 
1192               // Store and compress the pixmap
1193               GP<IW44Image> iwpix=IW44Image::create_encode(*pm);
1194               GP<ByteStream> gstr=ByteStream::create();
1195               IWEncoderParms parms;
1196               parms.slices=97;
1197               parms.bytes=0;
1198               parms.decibels=0;
1199               iwpix->encode_chunk(gstr, parms);
1200               TArray<char> data=gstr->get_data();
1201 
1202               req->data_pool->add_data((const char *) data, data.size());
1203               req->data_pool->set_eof();
1204 
1205               req->thumb_file=0;
1206               req->image_file=0;
1207               remove=true;
1208             } else if (req->image_file->is_decode_failed())
1209             {
1210               // Unfortunately we cannot decode it
1211               req->thumb_file=0;
1212               req->image_file=0;
1213               req->data_pool->set_eof();
1214               remove=true;
1215             } else
1216             {
1217               req->image_file->start_decode();
1218             }
1219           }
1220         }
1221       } G_CATCH(exc) {
1222         GUTF8String msg="Failed to decode thumbnails:\n";
1223         msg+=exc.get_cause();
1224         get_portcaster()->notify_error(this, msg);
1225 
1226 	       // Get rid of this request
1227         req->image_file=0;
1228         req->thumb_file=0;
1229         req->data_pool->set_eof();
1230         remove=true;
1231       } G_ENDCATCH;
1232     }
1233 
1234     if (remove)
1235     {
1236       GPosition this_pos=pos;
1237       ++pos;
1238       threqs_list.del(this_pos);
1239     } else ++pos;
1240   }
1241 }
1242 
1243 GP<DjVuDocument::ThumbReq>
add_thumb_req(const GP<ThumbReq> & thumb_req)1244 DjVuDocument::add_thumb_req(const GP<ThumbReq> & thumb_req)
1245       // Will look through the list of pending requests for thumbnails
1246       // and try to add the specified request. If a duplicate is found,
1247       // it will be returned and the list will not be modified
1248 {
1249    GCriticalSectionLock lock(&threqs_lock);
1250    for(GPosition pos=threqs_list;pos;++pos)
1251    {
1252       GP<ThumbReq> req=threqs_list[pos];
1253       if (req->page_num==thumb_req->page_num)
1254 	 return req;
1255    }
1256    threqs_list.append(thumb_req);
1257    return thumb_req;
1258 }
1259 
1260 GList<GUTF8String>
get_id_list(void)1261 DjVuDocument::get_id_list(void)
1262 {
1263   GList<GUTF8String> ids;
1264   if (is_init_complete())
1265   {
1266     if(djvm_dir)
1267     {
1268       GPList<DjVmDir::File> files_list=djvm_dir->get_files_list();
1269       for(GPosition pos=files_list;pos;++pos)
1270       {
1271         ids.append(files_list[pos]->get_load_name());
1272       }
1273     }else
1274     {
1275       const int page_num=get_pages_num();
1276       for(int page=0;page<page_num;page++)
1277       {
1278         ids.append(page_to_url(page).fname());
1279       }
1280     }
1281   }
1282   return ids;
1283 }
1284 
1285 void
map_ids(GMap<GUTF8String,void * > & map)1286 DjVuDocument::map_ids(GMap<GUTF8String,void *> &map)
1287 {
1288   GList<GUTF8String> ids=get_id_list();
1289   for(GPosition pos=ids;pos;++pos)
1290   {
1291     map[ids[pos]]=0;
1292   }
1293 }
1294 
1295 GP<DataPool>
get_thumbnail(int page_num,bool dont_decode)1296 DjVuDocument::get_thumbnail(int page_num, bool dont_decode)
1297 {
1298    DEBUG_MSG("DjVuDocument::get_thumbnail(): page_num=" << page_num << "\n");
1299    DEBUG_MAKE_INDENT(3);
1300 
1301    if (!is_init_complete()) return 0;
1302 
1303    {
1304 	 // See if we already have request for this thumbnail pending
1305       GCriticalSectionLock lock(&threqs_lock);
1306       for(GPosition pos=threqs_list;pos;++pos)
1307       {
1308 	 GP<ThumbReq> req=threqs_list[pos];
1309 	 if (req->page_num==page_num)
1310 	    return req->data_pool;	// That's it. Just return it.
1311       }
1312    }
1313 
1314       // No pending request for this page... Create one
1315    GP<ThumbReq> thumb_req=new ThumbReq(page_num, DataPool::create());
1316 
1317       // First try to find predecoded thumbnail
1318    if (get_doc_type()==INDIRECT || get_doc_type()==BUNDLED)
1319    {
1320 	 // Predecoded thumbnails exist for new formats only
1321       GPList<DjVmDir::File> files_list=djvm_dir->get_files_list();
1322       GP<DjVmDir::File> thumb_file;
1323       int thumb_start=0;
1324       int page_cnt=-1;
1325       for(GPosition pos=files_list;pos;++pos)
1326       {
1327 	 GP<DjVmDir::File> f=files_list[pos];
1328 	 if (f->is_thumbnails())
1329 	 {
1330 	    thumb_file=f;
1331 	    thumb_start=page_cnt+1;
1332 	 } else if (f->is_page())
1333          {
1334            page_cnt++;
1335          }
1336 	 if (page_cnt==page_num) break;
1337       }
1338       if (thumb_file)
1339       {
1340 	    // That's the file with the desired thumbnail image
1341 	 thumb_req->thumb_file=get_djvu_file(thumb_file->get_load_name());
1342 	 thumb_req->thumb_chunk=page_num-thumb_start;
1343 	 thumb_req=add_thumb_req(thumb_req);
1344 	 process_threqs();
1345 	 return thumb_req->data_pool;
1346       }
1347    }
1348 
1349       // Apparently we're out of luck and need to decode the requested
1350       // page (unless it's already done and if it's allowed) and render
1351       // it into the thumbnail. If dont_decode is true, do not attempt
1352       // to create this file (because this will result in a request for data)
1353    GP<DjVuFile> file=get_djvu_file(page_num, dont_decode);
1354    if (file)
1355    {
1356       thumb_req->image_file=file;
1357 
1358 	 // I'm locking the flags here to make sure, that DjVuFile will not
1359 	 // change its state in between of the checks.
1360       GSafeFlags & file_flags=file->get_safe_flags();
1361       {
1362 	 GMonitorLock lock(&file_flags);
1363 	 if (thumb_req->image_file->is_decode_ok() || !dont_decode)
1364 	 {
1365 	       // Just add it to the list and call process_threqs(). It
1366 	       // will start decoding if necessary
1367 	    thumb_req=add_thumb_req(thumb_req);
1368 	    process_threqs();
1369 	 } else
1370 	 {
1371 	       // Nothing can be done return ZERO
1372 	    thumb_req=0;
1373 	 }
1374       }
1375    } else thumb_req=0;
1376 
1377    if (thumb_req) return thumb_req->data_pool;
1378    else return 0;
1379 }
1380 
1381 static void
add_to_cache(const GP<DjVuFile> & f,GMap<GURL,void * > & map,DjVuFileCache * cache)1382 add_to_cache(const GP<DjVuFile> & f, GMap<GURL, void *> & map,
1383 	     DjVuFileCache * cache)
1384 {
1385    GURL url=f->get_url();
1386    DEBUG_MSG("DjVuDocument::add_to_cache(): url='" << url << "'\n");
1387    DEBUG_MAKE_INDENT(3);
1388 
1389    if (!map.contains(url))
1390    {
1391       map[url]=0;
1392       cache->add_file(f);
1393 
1394       GPList<DjVuFile> list;
1395       for(GPosition pos=list;pos;++pos)
1396 	 add_to_cache(list[pos], map, cache);
1397    }
1398 }
1399 
1400 void
add_to_cache(const GP<DjVuFile> & f)1401 DjVuDocument::add_to_cache(const GP<DjVuFile> & f)
1402 {
1403    if (cache)
1404    {
1405       GMap<GURL, void *> map;
1406       ::add_to_cache(f, map, cache);
1407    }
1408 }
1409 
1410 void
notify_file_flags_changed(const DjVuFile * source,long set_mask,long clr_mask)1411 DjVuDocument::notify_file_flags_changed(const DjVuFile * source,
1412 					long set_mask, long clr_mask)
1413 {
1414       // Don't check here if the document is initialized or not.
1415       // This function may be called when it's not.
1416       // check();
1417    if (set_mask & DjVuFile::DECODE_OK)
1418    {
1419       set_file_aliases(source);
1420       if (cache) add_to_cache((DjVuFile *) source);
1421       if(!needs_compression_flag)
1422       {
1423         if(source->needs_compression())
1424         {
1425           can_compress_flag=true;
1426           needs_compression_flag=true;
1427         }else if(source->can_compress())
1428         {
1429           can_compress_flag=true;
1430         }
1431       }
1432    }
1433    process_threqs();
1434 }
1435 
1436 GP<DjVuFile>
id_to_file(const DjVuPort * source,const GUTF8String & id)1437 DjVuDocument::id_to_file(const DjVuPort * source, const GUTF8String &id)
1438 {
1439    return (DjVuFile *) get_djvu_file(id);
1440 }
1441 
1442 GP<DataPool>
request_data(const DjVuPort * source,const GURL & url)1443 DjVuDocument::request_data(const DjVuPort * source, const GURL & url)
1444 {
1445    DEBUG_MSG("DjVuDocument::request_data(): seeing if we can do it\n");
1446    DEBUG_MAKE_INDENT(3);
1447 
1448    if (url==init_url)
1449      return init_data_pool;
1450 
1451    check();	// Don't put it before 'init_data_pool'
1452 
1453    {
1454 	 // See if there is a file in the "UnnamedFiles" list.
1455 	 // If it's there, then create an empty DataPool and store its
1456 	 // pointer in the list. The "init thread" will eventually
1457 	 // do smth with it.
1458       GCriticalSectionLock lock(&ufiles_lock);
1459       for(GPosition pos=ufiles_list;pos;++pos)
1460       {
1461 	 GP<UnnamedFile> f=ufiles_list[pos];
1462 	 if (f->url==url)
1463 	 {
1464 	    DEBUG_MSG("Found tmp unnamed DjVuFile. Return empty DataPool\n");
1465 	       // Remember the DataPool. We will connect it to the
1466 	       // actual data after the document structure becomes known
1467 	    f->data_pool=DataPool::create();
1468 	    return f->data_pool;
1469 	 }
1470       }
1471    }
1472 
1473       // Well, the url is not in the "UnnamedFiles" list, but it doesn't
1474       // mean, that it's not "artificial". Stay alert!
1475    GP<DataPool> data_pool;
1476    if (flags & DOC_TYPE_KNOWN)
1477       switch(doc_type)
1478       {
1479 	 case OLD_BUNDLED:
1480 	 {
1481 	    if (flags & DOC_DIR_KNOWN)
1482 	    {
1483 	       DEBUG_MSG("The document is in OLD_BUNDLED format\n");
1484 	       if (url.base()!=init_url)
1485 		        G_THROW( ERR_MSG("DjVuDocument.URL_outside") "\t"+url.get_string());
1486 
1487 	       GP<DjVmDir0::FileRec> file=djvm_dir0->get_file(url.fname());
1488 	       if (!file)
1489                {
1490                  G_THROW( ERR_MSG("DjVuDocument.file_outside") "\t"+url.fname());
1491                }
1492 	       data_pool=DataPool::create(init_data_pool, file->offset, file->size);
1493 	    }
1494 	    break;
1495 	 }
1496 	 case BUNDLED:
1497 	 {
1498 	    if (flags & DOC_DIR_KNOWN)
1499 	    {
1500 	       DEBUG_MSG("The document is in new BUNDLED format\n");
1501 	       if (url.base()!=init_url)
1502                {
1503 		 G_THROW( ERR_MSG("DjVuDocument.URL_outside") "\t"
1504                    +url.get_string());
1505                }
1506 
1507 	       GP<DjVmDir::File> file=djvm_dir->id_to_file(url.fname());
1508 	       if (!file)
1509                {
1510                  G_THROW( ERR_MSG("DjVuDocument.file_outside") "\t"+url.fname());
1511                }
1512 	       data_pool=DataPool::create(init_data_pool, file->offset, file->size);
1513 	    }
1514 	    break;
1515 	 }
1516 	 case SINGLE_PAGE:
1517 	 case OLD_INDEXED:
1518 	 case INDIRECT:
1519 	 {
1520 	    DEBUG_MSG("The document is in SINGLE_PAGE or OLD_INDEXED or INDIRECT format\n");
1521 	    if (flags & DOC_DIR_KNOWN)
1522 	       if (doc_type==INDIRECT && !djvm_dir->id_to_file(url.fname()))
1523 		        G_THROW( ERR_MSG("DjVuDocument.URL_outside2") "\t"+url.get_string());
1524 
1525 	    if (url.is_local_file_url())
1526 	    {
1527 //	       GUTF8String fname=GOS::url_to_filename(url);
1528 //	       if (GOS::basename(fname)=="-") fname="-";
1529 	       DEBUG_MSG("url=" << url << "\n");
1530 
1531 	       data_pool=DataPool::create(url);
1532 	    }
1533 	 }
1534       }
1535    return data_pool;
1536 }
1537 
1538 
1539 static void
add_file_to_djvm(const GP<DjVuFile> & file,bool page,DjVmDoc & doc,GMap<GURL,void * > & map)1540 add_file_to_djvm(const GP<DjVuFile> & file, bool page,
1541 		 DjVmDoc & doc, GMap<GURL, void *> & map)
1542       // This function is used only for obsolete formats.
1543       // For new formats there is no need to process files recursively.
1544       // All information is already available from the DJVM chunk
1545 {
1546    GURL url=file->get_url();
1547 
1548    if (!map.contains(url))
1549    {
1550       map[url]=0;
1551 
1552       if (file->get_chunks_number()>0 && !file->contains_chunk("NDIR"))
1553       {
1554 	    // Get the data and unlink any file containing NDIR chunk.
1555 	    // Yes. We're lazy. We don't check if those files contain
1556 	    // anything else.
1557 	 GPosition pos;
1558 	 GPList<DjVuFile> files_list=file->get_included_files(false);
1559 	 GP<DataPool> data=file->get_djvu_data(false);
1560 	 for(pos=files_list;pos;++pos)
1561 	 {
1562 	    GP<DjVuFile> f=files_list[pos];
1563 	    if (f->contains_chunk("NDIR"))
1564 	       data=DjVuFile::unlink_file(data, f->get_url().fname());
1565 	 }
1566 
1567 	    // Finally add it to the document
1568 	 GUTF8String name=file->get_url().fname();
1569 	 GP<DjVmDir::File> file_rec=DjVmDir::File::create(
1570            name, name, name,
1571            page ? DjVmDir::File::PAGE : DjVmDir::File::INCLUDE );
1572 	 doc.insert_file(file_rec, data, -1);
1573 
1574 	    // And repeat for all included files
1575 	 for(pos=files_list;pos;++pos)
1576 	    add_file_to_djvm(files_list[pos], false, doc, map);
1577       }
1578    }
1579 }
1580 
1581 static void
add_file_to_djvm(const GP<DjVuFile> & file,bool page,DjVmDoc & doc,GMap<GURL,void * > & map,bool & needs_compression_flag,bool & can_compress_flag)1582 add_file_to_djvm(const GP<DjVuFile> & file, bool page,
1583 		 DjVmDoc & doc, GMap<GURL, void *> & map,
1584                  bool &needs_compression_flag, bool &can_compress_flag )
1585 {
1586   if(!needs_compression_flag)
1587   {
1588     if(file->needs_compression())
1589     {
1590       can_compress_flag=true;
1591       needs_compression_flag=true;
1592     }else if(file->can_compress())
1593     {
1594       can_compress_flag=true;
1595     }
1596   }
1597   add_file_to_djvm(file,page,doc,map);
1598 }
1599 
1600 static void
local_get_url_names(DjVuFile * f,const GMap<GURL,void * > & map,GMap<GURL,void * > & tmpmap)1601 local_get_url_names(DjVuFile * f,const GMap<GURL, void *> & map,GMap<GURL,void *> &tmpmap)
1602 {
1603    GURL url=f->get_url();
1604    if (!map.contains(url) && !tmpmap.contains(url))
1605    {
1606       tmpmap[url]=0;
1607       f->process_incl_chunks();
1608       GPList<DjVuFile> files_list=f->get_included_files(false);
1609       for(GPosition pos=files_list;pos;++pos)
1610          local_get_url_names(files_list[pos], map, tmpmap);
1611    }
1612 }
1613 
1614 static void
local_get_url_names(DjVuFile * f,GMap<GURL,void * > & map)1615 local_get_url_names(DjVuFile * f, GMap<GURL, void *> & map)
1616 {
1617    GMap<GURL,void *> tmpmap;
1618    local_get_url_names(f,map,tmpmap);
1619    for(GPosition pos=tmpmap;pos;++pos)
1620      map[tmpmap.key(pos)]=0;
1621 }
1622 
1623 GList<GURL>
get_url_names(void)1624 DjVuDocument::get_url_names(void)
1625 {
1626   check();
1627 
1628   GCriticalSectionLock lock(&url_names_lock);
1629   if(has_url_names)
1630     return url_names;
1631 
1632   GMap<GURL, void *> map;
1633   int i;
1634   if (doc_type==BUNDLED || doc_type==INDIRECT)
1635   {
1636     GPList<DjVmDir::File> files_list=djvm_dir->get_files_list();
1637     for(GPosition pos=files_list;pos;++pos)
1638     {
1639       GURL url=id_to_url(files_list[pos]->get_load_name());
1640       map[url]=0;
1641     }
1642   }else
1643   {
1644     int pages_num=get_pages_num();
1645     for(i=0;i<pages_num;i++)
1646     {
1647       G_TRY
1648       {
1649         local_get_url_names(get_djvu_file(i), map);
1650       }
1651       G_CATCH(ex)
1652       {
1653         // Why is this try/catch block here?
1654         G_TRY {
1655           get_portcaster()->notify_error(this, ex.get_cause());
1656           GUTF8String emsg = ERR_MSG("DjVuDocument.exclude_page") "\t" + GUTF8String(i+1);
1657           get_portcaster()->notify_error(this, emsg);
1658         }
1659         G_CATCH_ALL
1660         {
1661           G_RETHROW;
1662         }
1663         G_ENDCATCH;
1664       }
1665       G_ENDCATCH;
1666     }
1667   }
1668   for(GPosition j=map;j;++j)
1669   {
1670     if (map.key(j).is_local_file_url())
1671     {
1672       url_names.append(map.key(j));
1673     }
1674   }
1675   has_url_names=true;
1676   return url_names;
1677 }
1678 
1679 GP<DjVmDoc>
get_djvm_doc()1680 DjVuDocument::get_djvm_doc()
1681       // This function may block for data
1682 {
1683    check();
1684    DEBUG_MSG("DjVuDocument::get_djvm_doc(): creating the DjVmDoc\n");
1685    DEBUG_MAKE_INDENT(3);
1686 
1687    if (!is_init_complete())
1688      G_THROW( ERR_MSG("DjVuDocument.init_not_done") );
1689 
1690    GP<DjVmDoc> doc=DjVmDoc::create();
1691 
1692    if (doc_type==BUNDLED || doc_type==INDIRECT)
1693      {
1694        GPList<DjVmDir::File> files_list=djvm_dir->get_files_list();
1695        for(GPosition pos=files_list;pos;++pos)
1696          {
1697            GP<DjVmDir::File> f=new DjVmDir::File(*files_list[pos]);
1698            GP<DjVuFile> file=url_to_file(id_to_url(f->get_load_name()));
1699            GP<DataPool> data;
1700            if (file->is_modified())
1701              data=file->get_djvu_data(false);
1702            else
1703              data=file->get_init_data_pool();
1704            doc->insert_file(f, data);
1705          }
1706        if (djvm_nav)
1707          doc->set_djvm_nav(djvm_nav);
1708      }
1709    else if (doc_type==SINGLE_PAGE)
1710      {
1711        DEBUG_MSG("Creating: djvm for a single page document.\n");
1712        GMap<GURL, void *> map_add;
1713        GP<DjVuFile> file=get_djvu_file(0);
1714        add_file_to_djvm(file, true, *doc, map_add,
1715                         needs_compression_flag,can_compress_flag);
1716      }
1717    else
1718      {
1719        DEBUG_MSG("Converting: the document is in an old format.\n");
1720        GMap<GURL, void *> map_add;
1721        if(recover_errors == ABORT)
1722          {
1723            for(int page_num=0;page_num<ndir->get_pages_num();page_num++)
1724              {
1725                GP<DjVuFile> file=url_to_file(ndir->page_to_url(page_num));
1726                add_file_to_djvm(file, true, *doc, map_add,
1727                                 needs_compression_flag,can_compress_flag);
1728              }
1729          }
1730        else
1731          {
1732            for(int page_num=0;page_num<ndir->get_pages_num();page_num++)
1733              {
1734                G_TRY
1735                  {
1736                    GP<DjVuFile> file=url_to_file(ndir->page_to_url(page_num));
1737                    add_file_to_djvm(file, true, *doc, map_add,
1738                                     needs_compression_flag,can_compress_flag);
1739                  }
1740                G_CATCH(ex)
1741                  {
1742                    G_TRY {
1743                      get_portcaster()->notify_error(this, ex.get_cause());
1744                      GUTF8String emsg = ERR_MSG("DjVuDocument.skip_page") "\t"
1745                                       + GUTF8String(page_num+1);
1746                      get_portcaster()->notify_error(this, emsg);
1747                    }
1748                    G_CATCH_ALL
1749                      {
1750                        G_RETHROW;
1751                      }
1752                    G_ENDCATCH;
1753                  }
1754                G_ENDCATCH;
1755              }
1756          }
1757      }
1758    return doc;
1759 }
1760 
1761 void
write(const GP<ByteStream> & gstr,const GMap<GUTF8String,void * > & reserved)1762 DjVuDocument::write( const GP<ByteStream> &gstr,
1763   const GMap<GUTF8String,void *> &reserved)
1764 {
1765   DEBUG_MSG("DjVuDocument::write(): storing DjVmDoc into ByteStream\n");
1766   DEBUG_MAKE_INDENT(3);
1767   get_djvm_doc()->write(gstr,reserved);
1768 }
1769 
1770 void
write(const GP<ByteStream> & gstr,bool force_djvm)1771 DjVuDocument::write(const GP<ByteStream> &gstr, bool force_djvm)
1772 {
1773   DEBUG_MSG("DjVuDocument::write(): storing DjVmDoc into ByteStream\n");
1774   DEBUG_MAKE_INDENT(3);
1775 
1776   GP<DjVmDoc> doc=get_djvm_doc();
1777   GP<DjVmDir> dir=doc->get_djvm_dir();
1778 
1779   bool singlepage = (dir->get_files_num()==1 && !djvm_nav && !force_djvm);
1780   if (singlepage)
1781   {
1782     // maybe save as single page
1783     DjVmDir::File *file = dir->page_to_file(0);
1784     if (file->get_title() != file->get_load_name())
1785       singlepage = false;
1786   }
1787   if (! singlepage)
1788   {
1789     doc->write(gstr);
1790   }
1791   else
1792   {
1793     GPList<DjVmDir::File> files_list=dir->resolve_duplicates(false);
1794     GP<DataPool> pool=doc->get_data(files_list[files_list]->get_load_name());
1795     GP<ByteStream> pool_str=pool->get_stream();
1796     ByteStream &str=*gstr;
1797     str.writall(octets,4);
1798     str.copy(*pool_str);
1799   }
1800 }
1801 
1802 void
expand(const GURL & codebase,const GUTF8String & idx_name)1803 DjVuDocument::expand(const GURL &codebase, const GUTF8String &idx_name)
1804 {
1805    DEBUG_MSG("DjVuDocument::expand(): codebase='" << codebase << "'\n");
1806    DEBUG_MAKE_INDENT(3);
1807 
1808    GP<DjVmDoc> doc=get_djvm_doc();
1809    doc->expand(codebase, idx_name);
1810 }
1811 
1812 void
save_as(const GURL & where,bool bundled)1813 DjVuDocument::save_as(const GURL &where, bool bundled)
1814 {
1815    DEBUG_MSG("DjVuDocument::save_as(): where='" << where <<
1816 	     "', bundled=" << bundled << "\n");
1817    DEBUG_MAKE_INDENT(3);
1818 
1819    if (needs_compression())
1820    {
1821      if(!djvu_compress_codec)
1822      {
1823        G_THROW( ERR_MSG("DjVuDocument.comp_codec") );
1824      }
1825      GP<ByteStream> gmbs=ByteStream::create();
1826      write(gmbs);
1827      ByteStream &mbs=*gmbs;
1828      mbs.flush();
1829      mbs.seek(0,SEEK_SET);
1830      (*djvu_compress_codec)(gmbs,where,bundled);
1831    }else if (bundled)
1832    {
1833       DataPool::load_file(where);
1834       write(ByteStream::create(where, "wb"));
1835    } else
1836    {
1837      expand(where.base(), where.fname());
1838    }
1839 }
1840 
1841 static const char prolog[]="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DjVuXML PUBLIC \"-//W3C//DTD DjVuXML 1.1//EN\" \"pubtext/DjVuXML-s.dtd\">\n<DjVuXML>\n<HEAD>";
1842 static const char start_xml[]="</HEAD>\n<BODY>\n";
1843 static const char end_xml[]="</BODY>\n</DjVuXML>\n";
1844 
1845 void
writeDjVuXML(const GP<ByteStream> & gstr_out,int flags,int page) const1846 DjVuDocument::writeDjVuXML(const GP<ByteStream> &gstr_out,
1847                            int flags, int page) const
1848 {
1849   ByteStream &str_out=*gstr_out;
1850   str_out.writestring(
1851     prolog+get_init_url().get_string().toEscaped()+start_xml);
1852   const int pages=wait_get_pages_num();
1853   int pstart = (page < 0) ? 0 : page;
1854   int pend = (page < 0) ? pages : page+1;
1855   for(int page_num=pstart; page_num<pend; ++page_num)
1856   {
1857     const GP<DjVuImage> dimg(get_page(page_num,true));
1858     if(!dimg)
1859     {
1860       G_THROW( ERR_MSG("DjVuToText.decode_failed") );
1861     }
1862     dimg->writeXML(str_out,get_init_url(),flags);
1863   }
1864   str_out.writestring(GUTF8String(end_xml));
1865 }
1866 
1867 
1868 #ifdef HAVE_NAMESPACES
1869 }
1870 # ifndef NOT_USING_DJVU_NAMESPACE
1871 using namespace DJVU;
1872 # endif
1873 #endif
1874