1package DocSet::DocSet;
2
3use strict;
4use warnings;
5
6use DocSet::Util;
7use DocSet::RunTime;
8use DocSet::Cache ();
9use DocSet::Doc ();
10use DocSet::NavigateCache ();
11
12use File::Spec::Functions;
13
14use vars qw(@ISA);
15use DocSet::Config ();
16@ISA = qw(DocSet::Config);
17
18########
19sub new {
20    my $class = shift;
21    my $self = bless {}, ref($class)||$class;
22    $self->init(@_);
23    return $self;
24}
25
26sub init {
27    my ($self, $config_file, $parent_o, $src_rel_dir) = @_;
28
29    $self->read_config($config_file, $parent_o);
30
31    # are we inside a super docset?
32    if ($parent_o and ref($parent_o)) {
33        $self->{parent_o} = $parent_o;
34        $self->merge_config($src_rel_dir);
35    }
36
37    # we assume that the docset was not modified since the last run.
38    # if at least one source doc/config file was modified, the docset
39    # is considered modified as well and should be rebuild. It's the
40    # responsibility of the modified object to set its parent docset
41    # status to 'modified'.
42    $self->modified(0);
43
44    # currently a given docset is considered to be in the 'modified' state,
45    # if any of these conditions is true:
46    #
47    # 1. the included docset is 'modified':
48    # 2. the included chapter is 'modified':
49    # 3. the included 'copy as-is' files are 'modified':
50    # 4. config.cfg is newer than corresponding index.html
51    # 5. the cache file is missing
52
53}
54
55sub scan {
56    my ($self) = @_;
57
58    my $src_root = $self->get_dir('src_root');
59    my $purge = DocSet::RunTime::get_opts('rebuild_all') ? 1 : 0;
60    my $update = 1; # see DocSetCache::new
61    # each output mode need its own cache, because of the destination
62    # links which are different
63    my $mode = $self->get('tmpl_mode');
64    my $cache_file = "$src_root/cache.$mode.dat";
65
66    # - create the new cache object for updates
67    # - rebuild_all forces  the existing cache's purge
68    my $cache = DocSet::Cache->new($cache_file, $update, $purge);
69    $self->cache($cache); # add to the docset object
70
71    # a complete rebuild of the docset is done when:
72    # - we are told to do so:
73    # - if the cache file doesn't exist
74    # - or the we failed to retrieve an existing cache
75    if (DocSet::RunTime::get_opts('rebuild_all') ||
76        $cache->read_error || !$cache->can_read) {
77        $self->modified(1);
78        $self->rebuild(1);
79    }
80
81    # cache the index node meta data
82    $cache->index_node(id       => $self->get('id'),
83                       stitle   => $self->get('stitle'),
84                       title    => $self->get('title'),
85                       abstract => $self->get('abstract'),
86                       extra    => $self->get('extra'),
87                      );
88
89    # croaks if the docset id is duplicated
90    $self->check_duplicated_docset_ids();
91
92    # cache the location of the parent node cache
93    if (my $parent_o = $self->get('parent_o')) {
94        my $parent_src_root   = $parent_o->get_dir('src_root');
95        (my $rel2parent_src_root = $src_root) =~ s|\Q$parent_src_root||;
96        my $rel_dir = join '/', ("..") x ($rel2parent_src_root =~ tr|/|/|);
97        my $parent_cache_path = "$parent_src_root/cache.$mode.dat";
98        $cache->parent_node($parent_cache_path,
99                            $self->get('id'),
100                            $rel_dir);
101        $self->set_dir(rel_parent_root => $rel_dir);
102    }
103    else {
104        $self->set_dir(rel_parent_root => '.');
105    }
106
107    ###
108    # scan the nodes of the current level and cache the meta and other
109    # data
110
111    my $hidden = 0;
112    my @nodes_by_type = @{ $self->nodes_by_type };
113    while (@nodes_by_type) {
114        my ($type, $data) = splice @nodes_by_type, 0, 2;
115        if ($type eq 'docsets') {
116            my $docset = $self->docset_scan_n_cache($data, $hidden);
117            $self->modified(1) if $docset->modified();
118            $self->object_store($docset)
119                if defined $docset and ref $docset;
120
121        } elsif ($type eq 'chapters') {
122            my $chapter = $self->chapter_scan_n_cache($data, $hidden);
123            if (defined $chapter and ref $chapter) {
124                # modified chapter --> modified docset
125                $self->modified(1);
126                $self->object_store($chapter)
127            }
128        } elsif ($type eq 'links') {
129            $self->link_scan_n_cache($data, $hidden);
130            # we don't need to process links
131        } elsif ($type eq 'sitemap') {
132            $self->sitemap_cache($data, $hidden);
133            # we don't need to process links
134        } else {
135            # nothing
136        }
137
138    }
139
140    # the same but for the hidden objects
141    $hidden = 1;
142    my @hidden_nodes_by_type = @{ $self->hidden_nodes_by_type };
143    while (@hidden_nodes_by_type) {
144        my ($type, $data) = splice @hidden_nodes_by_type, 0, 2;
145        if ($type eq 'docsets') {
146            my $docset = $self->docset_scan_n_cache($data, $hidden);
147            $self->object_store($docset)
148                if defined $docset and ref $docset;
149
150        } elsif ($type eq 'chapters') {
151            my $chapter = $self->chapter_scan_n_cache($data, $hidden);
152            if (defined $chapter and ref $chapter) {
153                # modified chapter --> modified docset
154                $self->modified(1);
155                $self->object_store($chapter)
156            }
157
158        } else {
159            # nothing
160        }
161    }
162
163    $cache->node_groups($self->node_groups);
164
165    # compare whether the config file is newer than the corresponding
166    # index.html
167    my $dst_root = $self->get_dir('dst_root');
168    my $config_file = $self->{config_file};
169
170    my $dst_index = "$dst_root/index.html";
171    my ($should_update, $reason) =
172        $self->should_update($config_file, $dst_index);
173    $self->modified(1) if $should_update;
174
175    # if @body{qw(top bot)} component files exist, check whether they
176    # are newer than the target index.html file
177    if (my $body = $self->get('body')) {
178        my $src_root = $self->get_dir('src_root');
179        for my $sec (qw(top bot)) {
180            my $src_file = $body->{$sec};
181            next unless $src_file;
182            $src_file = catfile $src_root, $src_file;
183            my ($should_update, $reason) =
184                $self->should_update($src_file, $dst_index);
185            $self->modified(1) if $should_update;
186        }
187    }
188
189    # sync the cache
190    $cache->write;
191
192    # copy non-pod files like images and stylesheets
193    #
194    # META: though this belongs to the 'render' part, we run it here,
195    # since we need to know after the scan() whether the docset is
196    # modified. a cleaner, logic-wise, solution would be only to check
197    # modification times on files that may need to be copied as-is,
198    # but to postpone the copying, if any, only to the render part of
199    # the logic. We could also remove here all the files that don't
200    # need to be copied, since they didn't change.
201    $self->scan_copy_the_rest;
202
203}
204
205
206sub docset_scan_n_cache {
207    my ($self, $src_rel_dir, $hidden) = @_;
208
209    my $src_root = $self->get_dir('src_root');
210    my $config_file =  "$src_root/$src_rel_dir/config.cfg";
211    my $docset = $self->new($config_file, $self, $src_rel_dir);
212    $docset->scan;
213
214    # cache the child docset's meta data
215    my $id = $docset->get('id');
216    $self->cache->add($id);
217    my $meta = {
218                stitle   => $docset->get('stitle'),
219                title    => $docset->get('title'),
220                link     => "$src_rel_dir/index.html",
221                abstract => $docset->get('abstract'),
222                rel_path => $src_rel_dir,
223               };
224    $self->cache->set($id, 'meta', $meta, $hidden);
225
226    # add the location of the cache file, so later we can traverse the
227    # nodes, by just reading the cache files, which are linked to each
228    # other both ways.
229    my $mode = $self->get('tmpl_mode');
230    my $child_cache_path = "$src_root/$src_rel_dir/cache.$mode.dat";
231    $self->cache->set($id, 'child_cache_path', $child_cache_path);
232
233    note "\n"; # mark the end of scan
234
235    return $docset;
236}
237
238
239
240sub link_scan_n_cache {
241    my ($self, $link, $hidden) = @_;
242    my %meta = %$link; # make a copy
243    my $id = delete $meta{id};
244    $meta{title} = $meta{stitle} unless exists $meta{title};
245    $meta{stitle} = $meta{title} unless exists $meta{stitle};
246    $self->cache->add($id);
247    $self->cache->set($id, 'meta', \%meta, $hidden);
248}
249
250sub sitemap_cache {
251    my ($self, $link, $hidden) = @_;
252    my %meta = %$link; # make a copy
253    my $id = $meta{id};
254    $meta{title}  = $meta{stitle} unless exists $meta{title};
255    $meta{stitle} = $meta{title}  unless exists $meta{stitle};
256    $self->cache->add($id);
257    $self->cache->set($id, 'meta', \%meta, $hidden);
258
259    # we will need to raise this flag to render the doc
260    # XXX: consider creating a Sitemap class, so we can handle this
261    # generically as chapters and docsets
262    $self->{sitemap} = \%meta;
263    # see Config::sitemap method
264}
265
266sub chapter_scan_n_cache {
267    my ($self, $src_file, $hidden) = @_;
268
269    my $id = $src_file;
270    $self->cache->add($id);
271
272    my $trg_ext = $self->trg_ext();
273
274    my $src_root      = $self->get_dir('src_root');
275    my $dst_root      = $self->get_dir('dst_root');
276    my $abs_doc_root  = $self->get_dir('abs_doc_root');
277    my $src_path      = "$src_root/$src_file";
278
279    my $src_ext = filename_ext($src_file)
280        or die "cannot get an extension for $src_file [$src_path]";
281    my $src_mime = $self->ext2mime($src_ext)
282        or die "unknown extension: $src_ext [$src_path]";
283    (my $basename = $src_file) =~ s/\.$src_ext$//;
284
285    # destination paths
286    my $rel_dst_path = "$basename.$trg_ext";
287    $rel_dst_path =~ s|^\./||; # strip the leading './'
288    my $dst_path  = "$dst_root/$rel_dst_path";
289
290    my $rel_doc_root = $rel_dst_path =~ m|/|
291        ? join('/', ("..") x ($rel_dst_path =~ tr|/|/|))
292        : '.';
293
294    # push to the list of final chapter paths e.g. used by PS/PDF
295    # build, which needs all the non-hidden chapters
296    $self->trg_chapters($rel_dst_path) unless $hidden;
297
298    ### to rebuild or not
299    my ($should_update, $reason) = $self->should_update($src_path, $dst_path);
300    if (!$should_update) {
301        note "--- $src_file: skipping ($reason)";
302        return undef;
303    }
304
305    ### init
306    note "+++ $src_file: processing ($reason)";
307    my $dst_mime = $self->get('dst_mime');
308    my $conv_class = $self->conv_class($src_mime, $dst_mime);
309    require_package($conv_class);
310
311    my $chapter = $conv_class->new(
312         docset         => $self,
313         tmpl_mode      => $self->get('tmpl_mode'),
314         tmpl_root      => $self->get_dir('tmpl'),
315         src_root       => $src_root,
316         dst_root       => $dst_root,
317         src_uri        => $src_file,
318         src_path       => $src_path,
319         dst_path       => $dst_path,
320         rel_dst_path   => $rel_dst_path,
321         rel_doc_root   => $rel_doc_root,
322         abs_doc_root   => $abs_doc_root,
323         path_from_base => $self->get_dir('path_from_base'),
324        );
325
326    $chapter->scan();
327
328    # cache the chapter's meta and toc data
329    $self->cache->set($id, 'meta', $chapter->meta, $hidden);
330    $self->cache->set($id, 'toc',  $chapter->toc,  $hidden);
331
332    return $chapter;
333
334}
335
336####################
337sub scan_copy_the_rest {
338    my ($self) = @_;
339
340    my @scan_copy_files = @{ $self->files_to_scan_copy() };
341
342    return unless @scan_copy_files;
343
344    my %to_copy = ();
345
346    my $src_root = $self->get_dir('src_root');
347    my $dst_root = $self->get_dir('dst_root');
348    note "+++ Scanning the copy as-is files. Comparing $src_root with $dst_root";
349    foreach my $src_path (@scan_copy_files){
350        my $dst_path = $src_path;
351#        # some OSs's File::Find returns files with no dir prefix root
352#        # (that's what ()* is for
353#        $dst_path =~ s/(?:$src_root)*/$dst_root/;
354        $dst_path =~ s/\Q$src_root/$dst_root/;
355
356        # to rebuild or not to rebuild
357        my ($should_update, $reason) =
358            $self->should_update($src_path, $dst_path);
359        if (!$should_update) {
360            note "--- skipping cp $src_path $dst_path ($reason)";
361            next;
362        }
363        $self->modified(1); # dirty state
364        note "+++ processing $src_path => $dst_path ($reason)";
365        $to_copy{$src_path} = $dst_path;
366    }
367
368    $self->files_to_copy(\%to_copy);
369}
370
371sub render {
372    my ($self) = @_;
373
374    # if the docset wasn't modified, don't render the docset
375    return unless $self->modified();
376
377    $self->copy_the_rest;
378
379    my $src_root = $self->get_dir('src_root');
380
381    # each output mode need its own cache, because of the destination
382    # links which are different
383    my $mode = $self->get('tmpl_mode');
384    my $path = "$src_root/cache.$mode.dat";
385    my $cache = DocSet::Cache->new($path);
386
387    die "Failed to read cache from $path: " . $cache->read_error
388        if $cache->read_error;
389
390    # render the objects no matter what kind are they
391    for my $obj ($self->stored_objects) {
392        $obj->render($cache);
393    }
394
395    $self->complete;
396
397}
398
399####################
400sub copy_the_rest {
401    my ($self) = @_;
402
403    my %copy_files = %{ $self->files_to_copy };
404
405    return unless %copy_files;
406
407    my $src_root = $self->get_dir('src_root');
408    my $dst_root = $self->get_dir('dst_root');
409    note "+++ Copying the non-processed files from $src_root to $dst_root";
410    while (my ($src_path, $dst_path) = each %copy_files) {
411        note "+++ cp $src_path $dst_path";
412        copy_file($src_path, $dst_path);
413    }
414}
415
416
417# an abstract method
418sub complete {}
419
420# die with the error, and supply the context in which the error has happened
421sub error {
422    my $self = shift;
423
424    my @context;
425    push @context, "config file: $self->{config_file}";
426
427    die map({"!!! err: $_\n"} @_),
428        "in context:\n", map({"\t$_\n"} @context);
429
430}
431
432sub should_update {
433    my ($self, $src_path, $dst_path) = @_;
434
435    unless (-e $src_path) {
436        $self->error("cannot find $src_path");
437    }
438
439    # to rebuild or not to rebuild
440    my $not_modified =
441        (-e $dst_path and -M $dst_path < -M $src_path) ? 1 : 0;
442
443    my $reason = $not_modified ? 'not modified' : 'modified';
444    if ($self->rebuild()) {
445        return (1, "$reason / forced");
446    }
447    else {
448        return (!$not_modified, $reason);
449    }
450
451}
452
4531;
454__END__
455
456=head1 NAME
457
458C<DocSet::DocSet> - An abstract docset generation class
459
460=head1 SYNOPSIS
461
462  use DocSet::DocSet::HTML ();
463  my $docset = DocSet::DocSet::HTML->new($config_file);
464
465  # must start from the abs root
466  chdir $abs_root;
467
468  # must be a relative path to be able to move the generated code from
469  # location to location, without adjusting the links
470  $docset->set_dir(abs_root => ".");
471  $docset->scan;
472  $docset->render;
473
474  my $should_update = $self->should_update($src_path, $dst_path);
475
476=head1 DESCRIPTION
477
478C<DocSet::DocSet> processes a docset, which can include other docsets,
479documents and links. In the first pass it scans the linked to it
480documents and other docsets and caches this information and the
481objects for a later peruse. In the second pass the stored objects are
482rendered. And the docset is completed.
483
484This class cannot be used on its own and has to be subclassed and
485extended, by the sub-classes which has a specific to input and output
486formats of the documents that need to be processed. It handles only
487the partial functionality which doesn't require format specific
488knowledge.
489
490=head2 METHODS
491
492This class inherits from C<DocSet::Config> and you will find the
493documentation of methods inherited from this class in its pod.
494
495The following "public" methods are implemented in this super-class:
496
497=over
498
499=item * new
500
501  $class->new($config_file, $parent_o, $src_rel_dir);
502
503=item * init
504
505  $self->init($config_file, $parent_o, $src_rel_dir);
506
507=item * scan
508
509  $self->scan();
510
511Scans the docset for meta data and tocs of its items and caches this
512information and the item objects.
513
514=item * scan_copy_the_rest
515
516  $self->scan_copy_the_rest()
517
518Process the files that should be copied as is without processing
519(i.e. images, css files, etc). If any of the items have a timestamp
520newer than the corresponding copy in the target destination, the whole
521docset will be rebuilt.
522
523Only files that were modified will be copied during the render phase.
524
525=item * render
526
527  $self->render();
528
529Calls the render() method of each of the stored objects and creates an
530index page linking all the items.
531
532=item * copy_the_rest
533
534  $self->copy_the_rest()
535
536Copies the files which aren't processed (i.e. images, css files, etc.)
537and were modified as-is.
538
539=item * should_update
540
541  my $should_update = $self->should_update($src_path, $dst_path);
542
543Compare the timestamps/existance of src and dst paths and return
544(true, reason) if src is newer than dst otherwise return (false,
545reason)
546
547If rebuild_all runtime is on, this always returns (true, reason)
548
549=back
550
551=head2 ABSTRACT METHODS
552
553The following methods should be implemented by the sub-classes.
554
555=over
556
557=item * parse
558
559=item * retrieve_meta_data
560
561=item * convert
562
563=item * complete
564
565  $self->complete();
566
567put here anything that should be run after all the items have been
568rendered and all the meta info has been collected. i.e. generation of
569the I<index> file, to link to all the links and the parent node if
570such exists.
571
572=back
573
574=head1 AUTHORS
575
576Stas Bekman E<lt>stas (at) stason.orgE<gt>
577
578=cut
579