1#!/usr/local/bin/python3.8
2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
3
4
5__license__   = 'GPL v3'
6__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
8
9import numbers
10from struct import pack
11import io
12from collections import OrderedDict, defaultdict
13
14from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex,
15        encode_tbs, align_block, RECORD_SIZE, CNCX as CNCX_)
16from polyglot.builtins import iteritems, itervalues
17
18
19class CNCX(CNCX_):  # {{{
20
21    def __init__(self, toc, is_periodical):
22        strings = []
23        for item in toc.iterdescendants(breadth_first=True):
24            strings.append(item.title)
25            if is_periodical:
26                strings.append(item.klass)
27                if item.author:
28                    strings.append(item.author)
29                if item.description:
30                    strings.append(item.description)
31        CNCX_.__init__(self, strings)
32# }}}
33
34
35class TAGX:  # {{{
36
37    BITMASKS = {11:0b1}
38    BITMASKS.update({x:(1 << i) for i, x in enumerate([1, 2, 3, 4, 5, 21, 22, 23])})
39    BITMASKS.update({x:(1 << i) for i, x in enumerate([69, 70, 71, 72, 73])})
40
41    NUM_VALUES = defaultdict(lambda :1)
42    NUM_VALUES[11] = 3
43    NUM_VALUES[0] = 0
44
45    def __init__(self):
46        self.byts = bytearray()
47
48    def add_tag(self, tag):
49        buf = self.byts
50        buf.append(tag)
51        buf.append(self.NUM_VALUES[tag])
52        # bitmask
53        buf.append(self.BITMASKS[tag] if tag else 0)
54        # eof
55        buf.append(0 if tag else 1)
56
57    def header(self, control_byte_count):
58        header = b'TAGX'
59        # table length, control byte count
60        header += pack(b'>II', 12+len(self.byts), control_byte_count)
61        return header
62
63    @property
64    def periodical(self):
65        '''
66        TAGX block for the Primary index header of a periodical
67        '''
68        for i in (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72,73, 0):
69            self.add_tag(i)
70        return self.header(2) + bytes(self.byts)
71
72    @property
73    def secondary(self):
74        '''
75        TAGX block for the secondary index header of a periodical
76        '''
77        for i in (11, 0):
78            self.add_tag(i)
79        return self.header(1) + bytes(self.byts)
80
81    @property
82    def flat_book(self):
83        '''
84        TAGX block for the primary index header of a flat book
85        '''
86        for i in (1, 2, 3, 4, 0):
87            self.add_tag(i)
88        return self.header(1) + bytes(self.byts)
89
90
91# }}}
92
93# Index Entries {{{
94
95class IndexEntry:
96
97    TAG_VALUES = {
98            'offset': 1,
99            'size': 2,
100            'label_offset': 3,
101            'depth': 4,
102            'class_offset': 5,
103            'secondary': 11,
104            'parent_index': 21,
105            'first_child_index': 22,
106            'last_child_index': 23,
107            'image_index': 69,
108            'desc_offset': 70,
109            'author_offset': 71,
110
111    }
112    RTAG_MAP = {v:k for k, v in iteritems(TAG_VALUES)}  # noqa
113
114    def __init__(self, offset, label_offset):
115        self.offset, self.label_offset = offset, label_offset
116        self.depth, self.class_offset = 0, None
117        self.control_byte_count = 1
118
119        self.length = 0
120        self.index = 0
121
122        self.parent_index = None
123        self.first_child_index = None
124        self.last_child_index = None
125
126        self.image_index = None
127        self.author_offset = None
128        self.desc_offset = None
129
130    def __repr__(self):
131        return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,'
132                ' parent_index=%r)')%(self.offset, self.depth, self.length,
133                        self.index, self.parent_index)
134
135    @property
136    def size(self):
137        return self.length
138
139    @size.setter
140    def size(self, val):
141        self.length = val
142
143    @property
144    def next_offset(self):
145        return self.offset + self.length
146
147    @property
148    def tag_nums(self):
149        yield from range(1, 5)
150        for attr in ('class_offset', 'parent_index', 'first_child_index',
151                'last_child_index'):
152            if getattr(self, attr) is not None:
153                yield self.TAG_VALUES[attr]
154
155    @property
156    def entry_type(self):
157        ans = 0
158        for tag in self.tag_nums:
159            ans |= TAGX.BITMASKS[tag]
160        return ans
161
162    def attr_for_tag(self, tag):
163        return self.RTAG_MAP[tag]
164
165    @property
166    def bytestring(self):
167        buf = io.BytesIO()
168        if isinstance(self.index, numbers.Integral):
169            buf.write(encode_number_as_hex(self.index))
170        else:
171            raw = bytearray(self.index.encode('ascii'))
172            raw.insert(0, len(raw))
173            buf.write(bytes(raw))
174        et = self.entry_type
175        buf.write(bytes(bytearray([et])))
176
177        if self.control_byte_count == 2:
178            flags = 0
179            for attr in ('image_index', 'desc_offset', 'author_offset'):
180                val = getattr(self, attr)
181                if val is not None:
182                    tag = self.TAG_VALUES[attr]
183                    bm = TAGX.BITMASKS[tag]
184                    flags |= bm
185            buf.write(bytes(bytearray([flags])))
186
187        for tag in self.tag_nums:
188            attr = self.attr_for_tag(tag)
189            val = getattr(self, attr)
190            if isinstance(val, numbers.Integral):
191                val = [val]
192            for x in val:
193                buf.write(encint(x))
194
195        if self.control_byte_count == 2:
196            for attr in ('image_index', 'desc_offset', 'author_offset'):
197                val = getattr(self, attr)
198                if val is not None:
199                    buf.write(encint(val))
200
201        ans = buf.getvalue()
202        return ans
203
204
205class PeriodicalIndexEntry(IndexEntry):
206
207    def __init__(self, offset, label_offset, class_offset, depth):
208        IndexEntry.__init__(self, offset, label_offset)
209        self.depth = depth
210        self.class_offset = class_offset
211        self.control_byte_count = 2
212
213
214class SecondaryIndexEntry(IndexEntry):
215
216    INDEX_MAP = {'author':73, 'caption':72, 'credit':71, 'description':70,
217                'mastheadImage':69}
218
219    def __init__(self, index):
220        IndexEntry.__init__(self, 0, 0)
221        self.index = index
222
223        tag = self.INDEX_MAP[index]
224
225        # The values for this index entry
226        # I dont know what the 5 means, it is not the number of entries
227        self.secondary = [5 if tag == min(
228            itervalues(self.INDEX_MAP)) else 0, 0, tag]
229
230    @property
231    def tag_nums(self):
232        yield 11
233
234    @property
235    def entry_type(self):
236        return 1
237
238    @classmethod
239    def entries(cls):
240        rmap = {v:k for k,v in iteritems(cls.INDEX_MAP)}
241        for tag in sorted(rmap, reverse=True):
242            yield cls(rmap[tag])
243
244# }}}
245
246
247class TBS:  # {{{
248
249    '''
250    Take the list of index nodes starting/ending on a record and calculate the
251    trailing byte sequence for the record.
252    '''
253
254    def __init__(self, data, is_periodical, first=False, section_map={},
255            after_first=False):
256        self.section_map = section_map
257
258        if is_periodical:
259            # The starting bytes.
260            # The value is zero which I think indicates the periodical
261            # index entry. The values for the various flags seem to be
262            # unused. If the 0b100 is present, it means that the record
263            # deals with section 1 (or is the final record with section
264            # transitions).
265            self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
266            self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0},
267                    flag_size=3)
268            self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0},
269                    flag_size=3)
270            self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001:
271                0}, flag_size=3)
272
273            if not data:
274                byts = b''
275                if after_first:
276                    # This can happen if a record contains only text between
277                    # the periodical start and the first section
278                    byts = self.type_011
279                self.bytestring = byts
280            else:
281                depth_map = defaultdict(list)
282                for x in ('starts', 'ends', 'completes'):
283                    for idx in data[x]:
284                        depth_map[idx.depth].append(idx)
285                for l in itervalues(depth_map):
286                    l.sort(key=lambda x:x.offset)
287                self.periodical_tbs(data, first, depth_map)
288        else:
289            if not data:
290                self.bytestring = b''
291            else:
292                self.book_tbs(data, first)
293
294    def periodical_tbs(self, data, first, depth_map):
295        buf = io.BytesIO()
296
297        has_section_start = (depth_map[1] and
298                set(depth_map[1]).intersection(set(data['starts'])))
299        spanner = data['spans']
300        parent_section_index = -1
301
302        if depth_map[0]:
303            # We have a terminal record
304
305            # Find the first non periodical node
306            first_node = None
307            for nodes in (depth_map[1], depth_map[2]):
308                for node in nodes:
309                    if (first_node is None or (node.offset, node.depth) <
310                            (first_node.offset, first_node.depth)):
311                        first_node = node
312
313            typ = (self.type_110 if has_section_start else self.type_010)
314
315            # parent_section_index is needed for the last record
316            if first_node is not None and first_node.depth > 0:
317                parent_section_index = (first_node.index if first_node.depth == 1 else first_node.parent_index)
318            else:
319                parent_section_index = max(iter(self.section_map))
320
321        else:
322            # Non terminal record
323
324            if spanner is not None:
325                # record is spanned by a single article
326                parent_section_index = spanner.parent_index
327                typ = (self.type_110 if parent_section_index == 1 else
328                        self.type_010)
329            elif not depth_map[1]:
330                # has only article nodes, i.e. spanned by a section
331                parent_section_index = depth_map[2][0].parent_index
332                typ = (self.type_111 if parent_section_index == 1 else
333                        self.type_010)
334            else:
335                # has section transitions
336                if depth_map[2]:
337                    parent_section_index = depth_map[2][0].parent_index
338                else:
339                    parent_section_index = depth_map[1][0].index
340                typ = self.type_011
341
342        buf.write(typ)
343
344        if typ not in (self.type_110, self.type_111) and parent_section_index > 0:
345            extra = {}
346            # Write starting section information
347            if spanner is None:
348                num_articles = len([a for a in depth_map[1] if a.parent_index == parent_section_index])
349                if not depth_map[1]:
350                    extra = {0b0001: 0}
351                if num_articles > 1:
352                    extra = {0b0100: num_articles}
353            buf.write(encode_tbs(parent_section_index, extra))
354
355        if spanner is None:
356            articles = depth_map[2]
357            sections = {self.section_map[a.parent_index] for a in
358                articles}
359            sections = sorted(sections, key=lambda x:x.offset)
360            section_map = {s:[a for a in articles if a.parent_index ==
361                s.index] for s in sections}
362            for i, section in enumerate(sections):
363                # All the articles in this record that belong to section
364                articles = section_map[section]
365                first_article = articles[0]
366                last_article = articles[-1]
367                num = len(articles)
368                last_article_ends = (last_article in data['ends'] or
369                        last_article in data['completes'])
370
371                try:
372                    next_sec = sections[i+1]
373                except:
374                    next_sec = None
375
376                extra = {}
377                if num > 1:
378                    extra[0b0100] = num
379                if False and i == 0 and next_sec is not None:
380                    # Write offset to next section from start of record
381                    # I can't figure out exactly when Kindlegen decides to
382                    # write this so I have disabled it for now.
383                    extra[0b0001] = next_sec.offset - data['offset']
384
385                buf.write(encode_tbs(first_article.index-section.index, extra))
386
387                if next_sec is not None:
388                    buf.write(encode_tbs(last_article.index-next_sec.index,
389                        {0b1000: 0}))
390
391                # If a section TOC starts and extends into the next record add
392                # a trailing vwi. We detect this by TBS type==3, processing last
393                # section present in the record, and the last article in that
394                # section either ends or completes and doesn't finish
395                # on the last byte of the record.
396                elif (typ == self.type_011 and last_article_ends and
397                      ((last_article.offset+last_article.size) % RECORD_SIZE > 0)
398                     ):
399                    buf.write(encode_tbs(last_article.index-section.index-1,
400                        {0b1000: 0}))
401
402        else:
403            buf.write(encode_tbs(spanner.index - parent_section_index,
404                {0b0001: 0}))
405
406        self.bytestring = buf.getvalue()
407
408    def book_tbs(self, data, first):
409        spanner = data['spans']
410        if spanner is not None:
411            self.bytestring = encode_tbs(spanner.index, {0b010: 0, 0b001: 0},
412                    flag_size=3)
413        else:
414            starts, completes, ends = (data['starts'], data['completes'],
415                                        data['ends'])
416            if (not completes and (
417                (len(starts) == 1 and not ends) or (len(ends) == 1 and not
418                    starts))):
419                node = starts[0] if starts else ends[0]
420                self.bytestring = encode_tbs(node.index, {0b010: 0}, flag_size=3)
421            else:
422                nodes = []
423                for x in (starts, completes, ends):
424                    nodes.extend(x)
425                nodes.sort(key=lambda x:x.index)
426                self.bytestring = encode_tbs(nodes[0].index, {0b010:0,
427                    0b100: len(nodes)}, flag_size=3)
428
429# }}}
430
431
432class Indexer:  # {{{
433
434    def __init__(self, serializer, number_of_text_records,
435            size_of_last_text_record, masthead_offset, is_periodical,
436            opts, oeb):
437        self.serializer = serializer
438        self.number_of_text_records = number_of_text_records
439        self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) +
440                            size_of_last_text_record)
441        self.masthead_offset = masthead_offset
442        self.secondary_record_offset = None
443
444        self.oeb = oeb
445        self.log = oeb.log
446        self.opts = opts
447
448        self.is_periodical = is_periodical
449        if self.is_periodical and self.masthead_offset is None:
450            raise ValueError('Periodicals must have a masthead')
451
452        self.log('Generating MOBI index for a %s'%('periodical' if
453            self.is_periodical else 'book'))
454        self.is_flat_periodical = False
455        if self.is_periodical:
456            periodical_node = next(iter(oeb.toc))
457            sections = tuple(periodical_node)
458            self.is_flat_periodical = len(sections) == 1
459
460        self.records = []
461
462        if self.is_periodical:
463            # Ensure all articles have an author and description before
464            # creating the CNCX
465            for node in oeb.toc.iterdescendants():
466                if node.klass == 'article':
467                    aut, desc = node.author, node.description
468                    if not aut:
469                        aut = _('Unknown')
470                    if not desc:
471                        desc = _('No details available')
472                    node.author, node.description = aut, desc
473
474        self.cncx = CNCX(oeb.toc, self.is_periodical)
475
476        if self.is_periodical:
477            self.indices = self.create_periodical_index()
478        else:
479            self.indices = self.create_book_index()
480
481        if not self.indices:
482            raise ValueError('No valid entries in TOC, cannot generate index')
483
484        self.records.append(self.create_index_record())
485        self.records.insert(0, self.create_header())
486        self.records.extend(self.cncx.records)
487
488        if is_periodical:
489            self.secondary_record_offset = len(self.records)
490            self.records.append(self.create_header(secondary=True))
491            self.records.append(self.create_index_record(secondary=True))
492
493        self.calculate_trailing_byte_sequences()
494
495    def create_index_record(self, secondary=False):  # {{{
496        header_length = 192
497        buf = io.BytesIO()
498        indices = list(SecondaryIndexEntry.entries()) if secondary else self.indices
499
500        # Write index entries
501        offsets = []
502        for i in indices:
503            offsets.append(buf.tell())
504            buf.write(i.bytestring)
505
506        index_block = align_block(buf.getvalue())
507
508        # Write offsets to index entries as an IDXT block
509        idxt_block = b'IDXT'
510        buf.seek(0), buf.truncate(0)
511        for offset in offsets:
512            buf.write(pack(b'>H', header_length+offset))
513        idxt_block = align_block(idxt_block + buf.getvalue())
514        body = index_block + idxt_block
515
516        header = b'INDX'
517        buf.seek(0), buf.truncate(0)
518        buf.write(pack(b'>I', header_length))
519        buf.write(b'\0'*4)  # Unknown
520        buf.write(pack(b'>I', 1))  # Header type? Or index record number?
521        buf.write(b'\0'*4)  # Unknown
522        # IDXT block offset
523        buf.write(pack(b'>I', header_length + len(index_block)))
524        # Number of index entries
525        buf.write(pack(b'>I', len(offsets)))
526        # Unknown
527        buf.write(b'\xff'*8)
528        # Unknown
529        buf.write(b'\0'*156)
530
531        header += buf.getvalue()
532
533        ans = header + body
534        if len(ans) > 0x10000:
535            raise ValueError('Too many entries (%d) in the TOC'%len(offsets))
536        return ans
537    # }}}
538
539    def create_header(self, secondary=False):  # {{{
540        buf = io.BytesIO()
541        if secondary:
542            tagx_block = TAGX().secondary
543        else:
544            tagx_block = (TAGX().periodical if self.is_periodical else
545                                TAGX().flat_book)
546        header_length = 192
547
548        # Ident 0 - 4
549        buf.write(b'INDX')
550
551        # Header length 4 - 8
552        buf.write(pack(b'>I', header_length))
553
554        # Unknown 8-16
555        buf.write(b'\0'*8)
556
557        # Index type: 0 - normal, 2 - inflection 16 - 20
558        buf.write(pack(b'>I', 2))
559
560        # IDXT offset 20-24
561        buf.write(pack(b'>I', 0))  # Filled in later
562
563        # Number of index records 24-28
564        buf.write(pack(b'>I', 1 if secondary else len(self.records)))
565
566        # Index Encoding 28-32
567        buf.write(pack(b'>I', 65001))  # utf-8
568
569        # Unknown 32-36
570        buf.write(b'\xff'*4)
571
572        # Number of index entries 36-40
573        indices = list(SecondaryIndexEntry.entries()) if secondary else self.indices
574        buf.write(pack(b'>I', len(indices)))
575
576        # ORDT offset 40-44
577        buf.write(pack(b'>I', 0))
578
579        # LIGT offset 44-48
580        buf.write(pack(b'>I', 0))
581
582        # Number of LIGT entries 48-52
583        buf.write(pack(b'>I', 0))
584
585        # Number of CNCX records 52-56
586        buf.write(pack(b'>I', 0 if secondary else len(self.cncx.records)))
587
588        # Unknown 56-180
589        buf.write(b'\0'*124)
590
591        # TAGX offset 180-184
592        buf.write(pack(b'>I', header_length))
593
594        # Unknown 184-192
595        buf.write(b'\0'*8)
596
597        # TAGX block
598        buf.write(tagx_block)
599
600        num = len(indices)
601
602        # The index of the last entry in the NCX
603        idx = indices[-1].index
604        if isinstance(idx, numbers.Integral):
605            idx = encode_number_as_hex(idx)
606        else:
607            idx = idx.encode('ascii')
608            idx = (bytes(bytearray([len(idx)]))) + idx
609        buf.write(idx)
610
611        # The number of entries in the NCX
612        buf.write(pack(b'>H', num))
613
614        # Padding
615        pad = (4 - (buf.tell()%4))%4
616        if pad:
617            buf.write(b'\0'*pad)
618
619        idxt_offset = buf.tell()
620
621        buf.write(b'IDXT')
622        buf.write(pack(b'>H', header_length + len(tagx_block)))
623        buf.write(b'\0')
624        buf.seek(20)
625        buf.write(pack(b'>I', idxt_offset))
626
627        return align_block(buf.getvalue())
628    # }}}
629
630    def create_book_index(self):  # {{{
631        indices = []
632        seen = set()
633        id_offsets = self.serializer.id_offsets
634
635        # Flatten toc so that chapter to chapter jumps work with all sub
636        # chapter levels as well
637        for node in self.oeb.toc.iterdescendants():
638            try:
639                offset = id_offsets[node.href]
640                label = self.cncx[node.title]
641            except:
642                self.log.warn('TOC item %s [%s] not found in document'%(
643                    node.title, node.href))
644                continue
645
646            if offset in seen:
647                continue
648            seen.add(offset)
649
650            indices.append(IndexEntry(offset, label))
651
652        indices.sort(key=lambda x:x.offset)
653
654        # Set lengths
655        for i, index in enumerate(indices):
656            try:
657                next_offset = indices[i+1].offset
658            except:
659                next_offset = self.serializer.body_end_offset
660            index.length = next_offset - index.offset
661
662        # Remove empty indices
663        indices = [x for x in indices if x.length > 0]
664
665        # Reset lengths in case any were removed
666        for i, index in enumerate(indices):
667            try:
668                next_offset = indices[i+1].offset
669            except:
670                next_offset = self.serializer.body_end_offset
671            index.length = next_offset - index.offset
672
673        # Set index values
674        for index, x in enumerate(indices):
675            x.index = index
676
677        return indices
678
679    # }}}
680
681    def create_periodical_index(self):  # {{{
682        periodical_node = next(iter(self.oeb.toc))
683        periodical_node_offset = self.serializer.body_start_offset
684        periodical_node_size = (self.serializer.body_end_offset -
685                periodical_node_offset)
686
687        normalized_sections = []
688
689        id_offsets = self.serializer.id_offsets
690
691        periodical = PeriodicalIndexEntry(periodical_node_offset,
692                self.cncx[periodical_node.title],
693                self.cncx[periodical_node.klass], 0)
694        periodical.length = periodical_node_size
695        periodical.first_child_index = 1
696        periodical.image_index = self.masthead_offset
697
698        seen_sec_offsets = set()
699        seen_art_offsets = set()
700
701        for sec in periodical_node:
702            normalized_articles = []
703            try:
704                offset = id_offsets[sec.href]
705                label = self.cncx[sec.title]
706                klass = self.cncx[sec.klass]
707            except:
708                continue
709            if offset in seen_sec_offsets:
710                continue
711
712            seen_sec_offsets.add(offset)
713            section = PeriodicalIndexEntry(offset, label, klass, 1)
714            section.parent_index = 0
715
716            for art in sec:
717                try:
718                    offset = id_offsets[art.href]
719                    label = self.cncx[art.title]
720                    klass = self.cncx[art.klass]
721                except:
722                    continue
723                if offset in seen_art_offsets:
724                    continue
725                seen_art_offsets.add(offset)
726                article = PeriodicalIndexEntry(offset, label, klass, 2)
727                normalized_articles.append(article)
728                article.author_offset = self.cncx[art.author]
729                article.desc_offset = self.cncx[art.description]
730                if getattr(art, 'toc_thumbnail', None) is not None:
731                    try:
732                        ii = self.serializer.images[art.toc_thumbnail] - 1
733                        if ii > -1:
734                            article.image_index = ii
735                    except KeyError:
736                        pass  # Image not found in serializer
737
738            if normalized_articles:
739                normalized_articles.sort(key=lambda x:x.offset)
740                normalized_sections.append((section, normalized_articles))
741
742        normalized_sections.sort(key=lambda x:x[0].offset)
743
744        # Set lengths
745        for s, x in enumerate(normalized_sections):
746            sec, normalized_articles = x
747            try:
748                sec.length = normalized_sections[s+1][0].offset - sec.offset
749            except:
750                sec.length = self.serializer.body_end_offset - sec.offset
751            for i, art in enumerate(normalized_articles):
752                try:
753                    art.length = normalized_articles[i+1].offset - art.offset
754                except:
755                    art.length = sec.offset + sec.length - art.offset
756
757        # Filter
758        for i, x in list(enumerate(normalized_sections)):
759            sec, normalized_articles = x
760            normalized_articles = list(filter(lambda x: x.length > 0,
761                normalized_articles))
762            normalized_sections[i] = (sec, normalized_articles)
763
764        normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1],
765            normalized_sections))
766
767        # Set indices
768        i = 0
769        for sec, articles in normalized_sections:
770            i += 1
771            sec.index = i
772            sec.parent_index = 0
773
774        for sec, articles in normalized_sections:
775            for art in articles:
776                i += 1
777                art.index = i
778
779                art.parent_index = sec.index
780
781        for sec, normalized_articles in normalized_sections:
782            sec.first_child_index = normalized_articles[0].index
783            sec.last_child_index = normalized_articles[-1].index
784
785        # Set lengths again to close up any gaps left by filtering
786        for s, x in enumerate(normalized_sections):
787            sec, articles = x
788            try:
789                next_offset = normalized_sections[s+1][0].offset
790            except:
791                next_offset = self.serializer.body_end_offset
792            sec.length = next_offset - sec.offset
793
794            for a, art in enumerate(articles):
795                try:
796                    next_offset = articles[a+1].offset
797                except:
798                    next_offset = sec.next_offset
799                art.length = next_offset - art.offset
800
801        # Sanity check
802        for s, x in enumerate(normalized_sections):
803            sec, articles = x
804            try:
805                next_sec = normalized_sections[s+1][0]
806            except:
807                if (sec.length == 0 or sec.next_offset !=
808                        self.serializer.body_end_offset):
809                    raise ValueError('Invalid section layout')
810            else:
811                if next_sec.offset != sec.next_offset or sec.length == 0:
812                    raise ValueError('Invalid section layout')
813            for a, art in enumerate(articles):
814                try:
815                    next_art = articles[a+1]
816                except:
817                    if (art.length == 0 or art.next_offset !=
818                            sec.next_offset):
819                        raise ValueError('Invalid article layout')
820                else:
821                    if art.length == 0 or art.next_offset != next_art.offset:
822                        raise ValueError('Invalid article layout')
823
824        # Flatten
825        indices = [periodical]
826        for sec, articles in normalized_sections:
827            indices.append(sec)
828            periodical.last_child_index = sec.index
829
830        for sec, articles in normalized_sections:
831            for a in articles:
832                indices.append(a)
833
834        return indices
835    # }}}
836
837    # TBS {{{
838    def calculate_trailing_byte_sequences(self):
839        self.tbs_map = {}
840        found_node = False
841        sections = [i for i in self.indices if i.depth == 1]
842        section_map = OrderedDict((i.index, i) for i in
843                sorted(sections, key=lambda x:x.offset))
844
845        deepest = max(i.depth for i in self.indices)
846
847        for i in range(self.number_of_text_records):
848            offset = i * RECORD_SIZE
849            next_offset = offset + RECORD_SIZE
850            data = {'ends':[], 'completes':[], 'starts':[],
851                    'spans':None, 'offset':offset, 'record_number':i+1}
852
853            for index in self.indices:
854
855                if index.offset >= next_offset:
856                    # Node starts after current record
857                    if index.depth == deepest:
858                        break
859                    else:
860                        continue
861                if index.next_offset <= offset:
862                    # Node ends before current record
863                    continue
864                if index.offset >= offset:
865                    # Node starts in current record
866                    if index.next_offset <= next_offset:
867                        # Node ends in current record
868                        data['completes'].append(index)
869                    else:
870                        data['starts'].append(index)
871                else:
872                    # Node starts before current records
873                    if index.next_offset <= next_offset:
874                        # Node ends in current record
875                        data['ends'].append(index)
876                    elif index.depth == deepest:
877                        data['spans'] = index
878
879            if (data['ends'] or data['completes'] or data['starts'] or
880                    data['spans'] is not None):
881                self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not
882                        found_node, section_map=section_map)
883                found_node = True
884            else:
885                self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False,
886                        after_first=found_node, section_map=section_map)
887
888    def get_trailing_byte_sequence(self, num):
889        return self.tbs_map[num].bytestring
890    # }}}
891
892# }}}
893