1package DocSet::DocSet; 2 3use strict; 4use warnings; 5 6use DocSet::Util; 7use DocSet::RunTime; 8use DocSet::Cache (); 9use DocSet::Doc (); 10use DocSet::NavigateCache (); 11 12use File::Spec::Functions; 13 14use vars qw(@ISA); 15use DocSet::Config (); 16@ISA = qw(DocSet::Config); 17 18######## 19sub new { 20 my $class = shift; 21 my $self = bless {}, ref($class)||$class; 22 $self->init(@_); 23 return $self; 24} 25 26sub init { 27 my ($self, $config_file, $parent_o, $src_rel_dir) = @_; 28 29 $self->read_config($config_file, $parent_o); 30 31 # are we inside a super docset? 32 if ($parent_o and ref($parent_o)) { 33 $self->{parent_o} = $parent_o; 34 $self->merge_config($src_rel_dir); 35 } 36 37 # we assume that the docset was not modified since the last run. 38 # if at least one source doc/config file was modified, the docset 39 # is considered modified as well and should be rebuild. It's the 40 # responsibility of the modified object to set its parent docset 41 # status to 'modified'. 42 $self->modified(0); 43 44 # currently a given docset is considered to be in the 'modified' state, 45 # if any of these conditions is true: 46 # 47 # 1. the included docset is 'modified': 48 # 2. the included chapter is 'modified': 49 # 3. the included 'copy as-is' files are 'modified': 50 # 4. config.cfg is newer than corresponding index.html 51 # 5. the cache file is missing 52 53} 54 55sub scan { 56 my ($self) = @_; 57 58 my $src_root = $self->get_dir('src_root'); 59 my $purge = DocSet::RunTime::get_opts('rebuild_all') ? 1 : 0; 60 my $update = 1; # see DocSetCache::new 61 # each output mode need its own cache, because of the destination 62 # links which are different 63 my $mode = $self->get('tmpl_mode'); 64 my $cache_file = "$src_root/cache.$mode.dat"; 65 66 # - create the new cache object for updates 67 # - rebuild_all forces the existing cache's purge 68 my $cache = DocSet::Cache->new($cache_file, $update, $purge); 69 $self->cache($cache); # add to the docset object 70 71 # a complete rebuild of the docset is done when: 72 # - we are told to do so: 73 # - if the cache file doesn't exist 74 # - or the we failed to retrieve an existing cache 75 if (DocSet::RunTime::get_opts('rebuild_all') || 76 $cache->read_error || !$cache->can_read) { 77 $self->modified(1); 78 $self->rebuild(1); 79 } 80 81 # cache the index node meta data 82 $cache->index_node(id => $self->get('id'), 83 stitle => $self->get('stitle'), 84 title => $self->get('title'), 85 abstract => $self->get('abstract'), 86 extra => $self->get('extra'), 87 ); 88 89 # croaks if the docset id is duplicated 90 $self->check_duplicated_docset_ids(); 91 92 # cache the location of the parent node cache 93 if (my $parent_o = $self->get('parent_o')) { 94 my $parent_src_root = $parent_o->get_dir('src_root'); 95 (my $rel2parent_src_root = $src_root) =~ s|\Q$parent_src_root||; 96 my $rel_dir = join '/', ("..") x ($rel2parent_src_root =~ tr|/|/|); 97 my $parent_cache_path = "$parent_src_root/cache.$mode.dat"; 98 $cache->parent_node($parent_cache_path, 99 $self->get('id'), 100 $rel_dir); 101 $self->set_dir(rel_parent_root => $rel_dir); 102 } 103 else { 104 $self->set_dir(rel_parent_root => '.'); 105 } 106 107 ### 108 # scan the nodes of the current level and cache the meta and other 109 # data 110 111 my $hidden = 0; 112 my @nodes_by_type = @{ $self->nodes_by_type }; 113 while (@nodes_by_type) { 114 my ($type, $data) = splice @nodes_by_type, 0, 2; 115 if ($type eq 'docsets') { 116 my $docset = $self->docset_scan_n_cache($data, $hidden); 117 $self->modified(1) if $docset->modified(); 118 $self->object_store($docset) 119 if defined $docset and ref $docset; 120 121 } elsif ($type eq 'chapters') { 122 my $chapter = $self->chapter_scan_n_cache($data, $hidden); 123 if (defined $chapter and ref $chapter) { 124 # modified chapter --> modified docset 125 $self->modified(1); 126 $self->object_store($chapter) 127 } 128 } elsif ($type eq 'links') { 129 $self->link_scan_n_cache($data, $hidden); 130 # we don't need to process links 131 } elsif ($type eq 'sitemap') { 132 $self->sitemap_cache($data, $hidden); 133 # we don't need to process links 134 } else { 135 # nothing 136 } 137 138 } 139 140 # the same but for the hidden objects 141 $hidden = 1; 142 my @hidden_nodes_by_type = @{ $self->hidden_nodes_by_type }; 143 while (@hidden_nodes_by_type) { 144 my ($type, $data) = splice @hidden_nodes_by_type, 0, 2; 145 if ($type eq 'docsets') { 146 my $docset = $self->docset_scan_n_cache($data, $hidden); 147 $self->object_store($docset) 148 if defined $docset and ref $docset; 149 150 } elsif ($type eq 'chapters') { 151 my $chapter = $self->chapter_scan_n_cache($data, $hidden); 152 if (defined $chapter and ref $chapter) { 153 # modified chapter --> modified docset 154 $self->modified(1); 155 $self->object_store($chapter) 156 } 157 158 } else { 159 # nothing 160 } 161 } 162 163 $cache->node_groups($self->node_groups); 164 165 # compare whether the config file is newer than the corresponding 166 # index.html 167 my $dst_root = $self->get_dir('dst_root'); 168 my $config_file = $self->{config_file}; 169 170 my $dst_index = "$dst_root/index.html"; 171 my ($should_update, $reason) = 172 $self->should_update($config_file, $dst_index); 173 $self->modified(1) if $should_update; 174 175 # if @body{qw(top bot)} component files exist, check whether they 176 # are newer than the target index.html file 177 if (my $body = $self->get('body')) { 178 my $src_root = $self->get_dir('src_root'); 179 for my $sec (qw(top bot)) { 180 my $src_file = $body->{$sec}; 181 next unless $src_file; 182 $src_file = catfile $src_root, $src_file; 183 my ($should_update, $reason) = 184 $self->should_update($src_file, $dst_index); 185 $self->modified(1) if $should_update; 186 } 187 } 188 189 # sync the cache 190 $cache->write; 191 192 # copy non-pod files like images and stylesheets 193 # 194 # META: though this belongs to the 'render' part, we run it here, 195 # since we need to know after the scan() whether the docset is 196 # modified. a cleaner, logic-wise, solution would be only to check 197 # modification times on files that may need to be copied as-is, 198 # but to postpone the copying, if any, only to the render part of 199 # the logic. We could also remove here all the files that don't 200 # need to be copied, since they didn't change. 201 $self->scan_copy_the_rest; 202 203} 204 205 206sub docset_scan_n_cache { 207 my ($self, $src_rel_dir, $hidden) = @_; 208 209 my $src_root = $self->get_dir('src_root'); 210 my $config_file = "$src_root/$src_rel_dir/config.cfg"; 211 my $docset = $self->new($config_file, $self, $src_rel_dir); 212 $docset->scan; 213 214 # cache the child docset's meta data 215 my $id = $docset->get('id'); 216 $self->cache->add($id); 217 my $meta = { 218 stitle => $docset->get('stitle'), 219 title => $docset->get('title'), 220 link => "$src_rel_dir/index.html", 221 abstract => $docset->get('abstract'), 222 rel_path => $src_rel_dir, 223 }; 224 $self->cache->set($id, 'meta', $meta, $hidden); 225 226 # add the location of the cache file, so later we can traverse the 227 # nodes, by just reading the cache files, which are linked to each 228 # other both ways. 229 my $mode = $self->get('tmpl_mode'); 230 my $child_cache_path = "$src_root/$src_rel_dir/cache.$mode.dat"; 231 $self->cache->set($id, 'child_cache_path', $child_cache_path); 232 233 note "\n"; # mark the end of scan 234 235 return $docset; 236} 237 238 239 240sub link_scan_n_cache { 241 my ($self, $link, $hidden) = @_; 242 my %meta = %$link; # make a copy 243 my $id = delete $meta{id}; 244 $meta{title} = $meta{stitle} unless exists $meta{title}; 245 $meta{stitle} = $meta{title} unless exists $meta{stitle}; 246 $self->cache->add($id); 247 $self->cache->set($id, 'meta', \%meta, $hidden); 248} 249 250sub sitemap_cache { 251 my ($self, $link, $hidden) = @_; 252 my %meta = %$link; # make a copy 253 my $id = $meta{id}; 254 $meta{title} = $meta{stitle} unless exists $meta{title}; 255 $meta{stitle} = $meta{title} unless exists $meta{stitle}; 256 $self->cache->add($id); 257 $self->cache->set($id, 'meta', \%meta, $hidden); 258 259 # we will need to raise this flag to render the doc 260 # XXX: consider creating a Sitemap class, so we can handle this 261 # generically as chapters and docsets 262 $self->{sitemap} = \%meta; 263 # see Config::sitemap method 264} 265 266sub chapter_scan_n_cache { 267 my ($self, $src_file, $hidden) = @_; 268 269 my $id = $src_file; 270 $self->cache->add($id); 271 272 my $trg_ext = $self->trg_ext(); 273 274 my $src_root = $self->get_dir('src_root'); 275 my $dst_root = $self->get_dir('dst_root'); 276 my $abs_doc_root = $self->get_dir('abs_doc_root'); 277 my $src_path = "$src_root/$src_file"; 278 279 my $src_ext = filename_ext($src_file) 280 or die "cannot get an extension for $src_file [$src_path]"; 281 my $src_mime = $self->ext2mime($src_ext) 282 or die "unknown extension: $src_ext [$src_path]"; 283 (my $basename = $src_file) =~ s/\.$src_ext$//; 284 285 # destination paths 286 my $rel_dst_path = "$basename.$trg_ext"; 287 $rel_dst_path =~ s|^\./||; # strip the leading './' 288 my $dst_path = "$dst_root/$rel_dst_path"; 289 290 my $rel_doc_root = $rel_dst_path =~ m|/| 291 ? join('/', ("..") x ($rel_dst_path =~ tr|/|/|)) 292 : '.'; 293 294 # push to the list of final chapter paths e.g. used by PS/PDF 295 # build, which needs all the non-hidden chapters 296 $self->trg_chapters($rel_dst_path) unless $hidden; 297 298 ### to rebuild or not 299 my ($should_update, $reason) = $self->should_update($src_path, $dst_path); 300 if (!$should_update) { 301 note "--- $src_file: skipping ($reason)"; 302 return undef; 303 } 304 305 ### init 306 note "+++ $src_file: processing ($reason)"; 307 my $dst_mime = $self->get('dst_mime'); 308 my $conv_class = $self->conv_class($src_mime, $dst_mime); 309 require_package($conv_class); 310 311 my $chapter = $conv_class->new( 312 docset => $self, 313 tmpl_mode => $self->get('tmpl_mode'), 314 tmpl_root => $self->get_dir('tmpl'), 315 src_root => $src_root, 316 dst_root => $dst_root, 317 src_uri => $src_file, 318 src_path => $src_path, 319 dst_path => $dst_path, 320 rel_dst_path => $rel_dst_path, 321 rel_doc_root => $rel_doc_root, 322 abs_doc_root => $abs_doc_root, 323 path_from_base => $self->get_dir('path_from_base'), 324 ); 325 326 $chapter->scan(); 327 328 # cache the chapter's meta and toc data 329 $self->cache->set($id, 'meta', $chapter->meta, $hidden); 330 $self->cache->set($id, 'toc', $chapter->toc, $hidden); 331 332 return $chapter; 333 334} 335 336#################### 337sub scan_copy_the_rest { 338 my ($self) = @_; 339 340 my @scan_copy_files = @{ $self->files_to_scan_copy() }; 341 342 return unless @scan_copy_files; 343 344 my %to_copy = (); 345 346 my $src_root = $self->get_dir('src_root'); 347 my $dst_root = $self->get_dir('dst_root'); 348 note "+++ Scanning the copy as-is files. Comparing $src_root with $dst_root"; 349 foreach my $src_path (@scan_copy_files){ 350 my $dst_path = $src_path; 351# # some OSs's File::Find returns files with no dir prefix root 352# # (that's what ()* is for 353# $dst_path =~ s/(?:$src_root)*/$dst_root/; 354 $dst_path =~ s/\Q$src_root/$dst_root/; 355 356 # to rebuild or not to rebuild 357 my ($should_update, $reason) = 358 $self->should_update($src_path, $dst_path); 359 if (!$should_update) { 360 note "--- skipping cp $src_path $dst_path ($reason)"; 361 next; 362 } 363 $self->modified(1); # dirty state 364 note "+++ processing $src_path => $dst_path ($reason)"; 365 $to_copy{$src_path} = $dst_path; 366 } 367 368 $self->files_to_copy(\%to_copy); 369} 370 371sub render { 372 my ($self) = @_; 373 374 # if the docset wasn't modified, don't render the docset 375 return unless $self->modified(); 376 377 $self->copy_the_rest; 378 379 my $src_root = $self->get_dir('src_root'); 380 381 # each output mode need its own cache, because of the destination 382 # links which are different 383 my $mode = $self->get('tmpl_mode'); 384 my $path = "$src_root/cache.$mode.dat"; 385 my $cache = DocSet::Cache->new($path); 386 387 die "Failed to read cache from $path: " . $cache->read_error 388 if $cache->read_error; 389 390 # render the objects no matter what kind are they 391 for my $obj ($self->stored_objects) { 392 $obj->render($cache); 393 } 394 395 $self->complete; 396 397} 398 399#################### 400sub copy_the_rest { 401 my ($self) = @_; 402 403 my %copy_files = %{ $self->files_to_copy }; 404 405 return unless %copy_files; 406 407 my $src_root = $self->get_dir('src_root'); 408 my $dst_root = $self->get_dir('dst_root'); 409 note "+++ Copying the non-processed files from $src_root to $dst_root"; 410 while (my ($src_path, $dst_path) = each %copy_files) { 411 note "+++ cp $src_path $dst_path"; 412 copy_file($src_path, $dst_path); 413 } 414} 415 416 417# an abstract method 418sub complete {} 419 420# die with the error, and supply the context in which the error has happened 421sub error { 422 my $self = shift; 423 424 my @context; 425 push @context, "config file: $self->{config_file}"; 426 427 die map({"!!! err: $_\n"} @_), 428 "in context:\n", map({"\t$_\n"} @context); 429 430} 431 432sub should_update { 433 my ($self, $src_path, $dst_path) = @_; 434 435 unless (-e $src_path) { 436 $self->error("cannot find $src_path"); 437 } 438 439 # to rebuild or not to rebuild 440 my $not_modified = 441 (-e $dst_path and -M $dst_path < -M $src_path) ? 1 : 0; 442 443 my $reason = $not_modified ? 'not modified' : 'modified'; 444 if ($self->rebuild()) { 445 return (1, "$reason / forced"); 446 } 447 else { 448 return (!$not_modified, $reason); 449 } 450 451} 452 4531; 454__END__ 455 456=head1 NAME 457 458C<DocSet::DocSet> - An abstract docset generation class 459 460=head1 SYNOPSIS 461 462 use DocSet::DocSet::HTML (); 463 my $docset = DocSet::DocSet::HTML->new($config_file); 464 465 # must start from the abs root 466 chdir $abs_root; 467 468 # must be a relative path to be able to move the generated code from 469 # location to location, without adjusting the links 470 $docset->set_dir(abs_root => "."); 471 $docset->scan; 472 $docset->render; 473 474 my $should_update = $self->should_update($src_path, $dst_path); 475 476=head1 DESCRIPTION 477 478C<DocSet::DocSet> processes a docset, which can include other docsets, 479documents and links. In the first pass it scans the linked to it 480documents and other docsets and caches this information and the 481objects for a later peruse. In the second pass the stored objects are 482rendered. And the docset is completed. 483 484This class cannot be used on its own and has to be subclassed and 485extended, by the sub-classes which has a specific to input and output 486formats of the documents that need to be processed. It handles only 487the partial functionality which doesn't require format specific 488knowledge. 489 490=head2 METHODS 491 492This class inherits from C<DocSet::Config> and you will find the 493documentation of methods inherited from this class in its pod. 494 495The following "public" methods are implemented in this super-class: 496 497=over 498 499=item * new 500 501 $class->new($config_file, $parent_o, $src_rel_dir); 502 503=item * init 504 505 $self->init($config_file, $parent_o, $src_rel_dir); 506 507=item * scan 508 509 $self->scan(); 510 511Scans the docset for meta data and tocs of its items and caches this 512information and the item objects. 513 514=item * scan_copy_the_rest 515 516 $self->scan_copy_the_rest() 517 518Process the files that should be copied as is without processing 519(i.e. images, css files, etc). If any of the items have a timestamp 520newer than the corresponding copy in the target destination, the whole 521docset will be rebuilt. 522 523Only files that were modified will be copied during the render phase. 524 525=item * render 526 527 $self->render(); 528 529Calls the render() method of each of the stored objects and creates an 530index page linking all the items. 531 532=item * copy_the_rest 533 534 $self->copy_the_rest() 535 536Copies the files which aren't processed (i.e. images, css files, etc.) 537and were modified as-is. 538 539=item * should_update 540 541 my $should_update = $self->should_update($src_path, $dst_path); 542 543Compare the timestamps/existance of src and dst paths and return 544(true, reason) if src is newer than dst otherwise return (false, 545reason) 546 547If rebuild_all runtime is on, this always returns (true, reason) 548 549=back 550 551=head2 ABSTRACT METHODS 552 553The following methods should be implemented by the sub-classes. 554 555=over 556 557=item * parse 558 559=item * retrieve_meta_data 560 561=item * convert 562 563=item * complete 564 565 $self->complete(); 566 567put here anything that should be run after all the items have been 568rendered and all the meta info has been collected. i.e. generation of 569the I<index> file, to link to all the links and the parent node if 570such exists. 571 572=back 573 574=head1 AUTHORS 575 576Stas Bekman E<lt>stas (at) stason.orgE<gt> 577 578=cut 579