1#Copyright ReportLab Europe Ltd. 2000-2017
2#see license.txt for license details
3__version__ = '$Id$'
4__doc__="""TrueType font support
5
6This defines classes to represent TrueType fonts.  They know how to calculate
7their own width and how to write themselves into PDF files.  They support
8subsetting and embedding and can represent all 16-bit Unicode characters.
9
10Note on dynamic fonts
11---------------------
12
13Usually a Font in ReportLab corresponds to a fixed set of PDF objects (Font,
14FontDescriptor, Encoding).  But with dynamic font subsetting a single TTFont
15will result in a number of Font/FontDescriptor/Encoding object sets, and the
16contents of those will depend on the actual characters used for printing.
17
18To support dynamic font subsetting a concept of "dynamic font" was introduced.
19Dynamic Fonts have a _dynamicFont attribute set to 1.
20
21Dynamic fonts have the following additional functions::
22
23    def splitString(self, text, doc):
24        '''Splits text into a number of chunks, each of which belongs to a
25        single subset.  Returns a list of tuples (subset, string).  Use
26        subset numbers with getSubsetInternalName.  Doc is used to identify
27        a document so that different documents may have different dynamically
28        constructed subsets.'''
29
30    def getSubsetInternalName(self, subset, doc):
31        '''Returns the name of a PDF Font object corresponding to a given
32        subset of this dynamic font.  Use this function instead of
33        PDFDocument.getInternalFontName.'''
34
35You must never call PDFDocument.getInternalFontName for dynamic fonts.
36
37If you have a traditional static font, mapping to PDF text output operators
38is simple::
39
40   '%s 14 Tf (%s) Tj' % (getInternalFontName(psfontname), text)
41
42If you have a dynamic font, use this instead::
43
44   for subset, chunk in font.splitString(text, doc):
45       '%s 14 Tf (%s) Tj' % (font.getSubsetInternalName(subset, doc), chunk)
46
47(Tf is a font setting operator and Tj is a text ouput operator.  You should
48also escape invalid characters in Tj argument, see TextObject._formatText.
49Oh, and that 14 up there is font size.)
50
51Canvas and TextObject have special support for dynamic fonts.
52"""
53
54from struct import pack, unpack, error as structError
55from reportlab.lib.utils import getBytesIO, isPy3, bytestr, isUnicode, char2int, bytesT, isStr, isBytes
56from reportlab.pdfbase import pdfmetrics, pdfdoc
57from reportlab import rl_config, xrange, ascii
58from reportlab.lib.rl_accel import hex32, add32, calcChecksum, instanceStringWidthTTF
59from collections import namedtuple
60import os, time
61
62class TTFError(pdfdoc.PDFError):
63    "TrueType font exception"
64    pass
65
66if isPy3:
67    def SUBSETN(n,table=bytes.maketrans(b'0123456789',b'ABCDEFGIJK')):
68        return bytes('%6.6d'%n,'ASCII').translate(table)
69else:
70    import string
71    def SUBSETN(n,table=string.maketrans(b'0123456789',b'ABCDEFGIJK'),translate=string.translate):
72        return translate('%6.6d'%n,table)
73#
74# Helpers
75#
76def makeToUnicodeCMap(fontname, subset):
77    """Creates a ToUnicode CMap for a given subset.  See Adobe
78    _PDF_Reference (ISBN 0-201-75839-3) for more information."""
79    cmap = [
80        "/CIDInit /ProcSet findresource begin",
81        "12 dict begin",
82        "begincmap",
83        "/CIDSystemInfo",
84        "<< /Registry (%s)" % fontname,
85        "/Ordering (%s)" % fontname,
86        "/Supplement 0",
87        ">> def",
88        "/CMapName /%s def" % fontname,
89        "/CMapType 2 def",
90        "1 begincodespacerange",
91        "<00> <%02X>" % (len(subset) - 1),
92        "endcodespacerange",
93        "%d beginbfchar" % len(subset)
94        ] + ["<%02X> <%04X>" % (i,v) for i,v in enumerate(subset)] + [
95        "endbfchar",
96        "endcmap",
97        "CMapName currentdict /CMap defineresource pop",
98        "end",
99        "end"
100        ]
101    return '\n'.join(cmap)
102
103def splice(stream, offset, value):
104    """Splices the given value into stream at the given offset and
105    returns the resulting stream (the original is unchanged)"""
106    return stream[:offset] + value + stream[offset + len(value):]
107
108def _set_ushort(stream, offset, value):
109    """Writes the given unsigned short value into stream at the given
110    offset and returns the resulting stream (the original is unchanged)"""
111    return splice(stream, offset, pack(">H", value))
112#
113# TrueType font handling
114#
115
116GF_ARG_1_AND_2_ARE_WORDS        = 1 << 0
117GF_ARGS_ARE_XY_VALUES           = 1 << 1
118GF_ROUND_XY_TO_GRID             = 1 << 2
119GF_WE_HAVE_A_SCALE              = 1 << 3
120GF_RESERVED                     = 1 << 4
121GF_MORE_COMPONENTS              = 1 << 5
122GF_WE_HAVE_AN_X_AND_Y_SCALE     = 1 << 6
123GF_WE_HAVE_A_TWO_BY_TWO         = 1 << 7
124GF_WE_HAVE_INSTRUCTIONS         = 1 << 8
125GF_USE_MY_METRICS               = 1 << 9
126GF_OVERLAP_COMPOUND             = 1 << 10
127GF_SCALED_COMPONENT_OFFSET      = 1 << 11
128GF_UNSCALED_COMPONENT_OFFSET    = 1 << 12
129
130
131_cached_ttf_dirs={}
132def _ttf_dirs(*roots):
133    R = _cached_ttf_dirs.get(roots,None)
134    if R is None:
135        join = os.path.join
136        realpath = os.path.realpath
137        R = []
138        aR = R.append
139        for root in roots:
140            for r, d, f in os.walk(root,followlinks=True):
141                s = realpath(r)
142                if s not in R: aR(s)
143                for s in d:
144                    s = realpath(join(r,s))
145                    if s not in R: aR(s)
146        _cached_ttf_dirs[roots] = R
147    return R
148
149def TTFOpenFile(fn):
150    '''Opens a TTF file possibly after searching TTFSearchPath
151    returns (filename,file)
152    '''
153    from reportlab.lib.utils import rl_isfile, open_for_read
154    try:
155        f = open_for_read(fn,'rb')
156        return fn, f
157    except IOError:
158        import os
159        if not os.path.isabs(fn):
160            for D in _ttf_dirs(*rl_config.TTFSearchPath):
161                tfn = os.path.join(D,fn)
162                if rl_isfile(tfn):
163                    f = open_for_read(tfn,'rb')
164                    return tfn, f
165        raise TTFError('Can\'t open file "%s"' % fn)
166
167class TTFontParser:
168    "Basic TTF file parser"
169    ttfVersions = (0x00010000,0x74727565,0x74746366)
170    ttcVersions = (0x00010000,0x00020000)
171    fileKind='TTF'
172
173    def __init__(self, file, validate=0,subfontIndex=0):
174        """Loads and parses a TrueType font file.  file can be a filename or a
175        file object.  If validate is set to a false values, skips checksum
176        validation.  This can save time, especially if the font is large.
177        """
178        self.validate = validate
179        self.readFile(file)
180        isCollection = self.readHeader()
181        if isCollection:
182            self.readTTCHeader()
183            self.getSubfont(subfontIndex)
184        else:
185            if self.validate: self.checksumFile()
186            self.readTableDirectory()
187            self.subfontNameX = b''
188
189    def readTTCHeader(self):
190        self.ttcVersion = self.read_ulong()
191        self.fileKind = 'TTC'
192        self.ttfVersions = self.ttfVersions[:-1]
193        if self.ttcVersion not in self.ttcVersions:
194            raise TTFError('"%s" is not a %s file: can\'t read version 0x%8.8x' %(self.filename,self.fileKind,self.ttcVersion))
195        self.numSubfonts = self.read_ulong()
196        self.subfontOffsets = []
197        a = self.subfontOffsets.append
198        for i in xrange(self.numSubfonts):
199            a(self.read_ulong())
200
201    def getSubfont(self,subfontIndex):
202        if self.fileKind!='TTC':
203            raise TTFError('"%s" is not a TTC file: use this method' % (self.filename,self.fileKind))
204        try:
205            pos = self.subfontOffsets[subfontIndex]
206        except IndexError:
207            raise TTFError('TTC file "%s": bad subfontIndex %s not in [0,%d]' % (self.filename,subfontIndex,self.numSubfonts-1))
208        self.seek(pos)
209        self.readHeader()
210        self.readTableDirectory()
211        self.subfontNameX = bytestr('-'+str(subfontIndex))
212
213    def readTableDirectory(self):
214        try:
215            self.numTables = self.read_ushort()
216            self.searchRange = self.read_ushort()
217            self.entrySelector = self.read_ushort()
218            self.rangeShift = self.read_ushort()
219
220            # Read table directory
221            self.table = {}
222            self.tables = []
223            for n in xrange(self.numTables):
224                record = {}
225                record['tag'] = self.read_tag()
226                record['checksum'] = self.read_ulong()
227                record['offset'] = self.read_ulong()
228                record['length'] = self.read_ulong()
229                self.tables.append(record)
230                self.table[record['tag']] = record
231        except:
232            raise TTFError('Corrupt %s file "%s" cannot read Table Directory' % (self.fileKind, self.filename))
233        if self.validate: self.checksumTables()
234
235    def readHeader(self):
236        '''read the sfnt header at the current position'''
237        try:
238            self.version = version = self.read_ulong()
239        except:
240            raise TTFError('"%s" is not a %s file: can\'t read version' %(self.filename,self.fileKind))
241
242        if version==0x4F54544F:
243            raise TTFError('%s file "%s": postscript outlines are not supported'%(self.fileKind,self.filename))
244
245        if version not in self.ttfVersions:
246            raise TTFError('Not a recognized TrueType font: version=0x%8.8X' % version)
247        return version==self.ttfVersions[-1]
248
249    def readFile(self,f):
250        if not hasattr(self,'_ttf_data'):
251            if hasattr(f,'read'):
252                self.filename = getattr(f,'name','(ttf)')   #good idea Marius
253                self._ttf_data = f.read()
254            else:
255                self.filename, f = TTFOpenFile(f)
256                self._ttf_data = f.read()
257                f.close()
258        self._pos = 0
259
260    def checksumTables(self):
261        # Check the checksums for all tables
262        for t in self.tables:
263            table = self.get_chunk(t['offset'], t['length'])
264            checksum = calcChecksum(table)
265            if t['tag'] == 'head':
266                adjustment = unpack('>l', table[8:8+4])[0]
267                checksum = add32(checksum, -adjustment)
268            xchecksum = t['checksum']
269            if xchecksum != checksum:
270                raise TTFError('TTF file "%s": invalid checksum %s table: %s (expected %s)' % (self.filename,hex32(checksum),t['tag'],hex32(xchecksum)))
271
272    def checksumFile(self):
273        # Check the checksums for the whole file
274        checksum = calcChecksum(self._ttf_data)
275        if 0xB1B0AFBA!=checksum:
276            raise TTFError('TTF file "%s": invalid checksum %s (expected 0xB1B0AFBA) len: %d &3: %d' % (self.filename,hex32(checksum),len(self._ttf_data),(len(self._ttf_data)&3)))
277
278    def get_table_pos(self, tag):
279        "Returns the offset and size of a given TTF table."
280        offset = self.table[tag]['offset']
281        length = self.table[tag]['length']
282        return (offset, length)
283
284    def seek(self, pos):
285        "Moves read pointer to a given offset in file."
286        self._pos = pos
287
288    def skip(self, delta):
289        "Skip the given number of bytes."
290        self._pos = self._pos + delta
291
292    def seek_table(self, tag, offset_in_table = 0):
293        """Moves read pointer to the given offset within a given table and
294        returns absolute offset of that position in the file."""
295        self._pos = self.get_table_pos(tag)[0] + offset_in_table
296        return self._pos
297
298    if isPy3:
299        def read_tag(self):
300            "Read a 4-character tag"
301            self._pos += 4
302            return str(self._ttf_data[self._pos - 4:self._pos],'utf8')
303
304        def get_chunk(self, pos, length):
305            "Return a chunk of raw data at given position"
306            return bytes(self._ttf_data[pos:pos+length])
307
308        def read_uint8(self):
309            self._pos += 1
310            return int(self._ttf_data[self._pos-1])
311    else:
312        def read_tag(self):
313            "Read a 4-character tag"
314            self._pos += 4
315            return self._ttf_data[self._pos - 4:self._pos]
316
317        def get_chunk(self, pos, length):
318            "Return a chunk of raw data at given position"
319            return self._ttf_data[pos:pos+length]
320
321        def read_uint8(self):
322            self._pos += 1
323            return ord(self._ttf_data[self._pos-1])
324
325    def read_ushort(self):
326        "Reads an unsigned short"
327        self._pos += 2
328        return unpack('>H',self._ttf_data[self._pos-2:self._pos])[0]
329
330    def read_ulong(self):
331        "Reads an unsigned long"
332        self._pos += 4
333        return unpack('>L',self._ttf_data[self._pos - 4:self._pos])[0]
334
335    def read_short(self):
336        "Reads a signed short"
337        self._pos += 2
338        try:
339            return unpack('>h',self._ttf_data[self._pos-2:self._pos])[0]
340        except structError as error:
341            raise TTFError(error)
342
343    def get_ushort(self, pos):
344        "Return an unsigned short at given position"
345        return unpack('>H',self._ttf_data[pos:pos+2])[0]
346
347    def get_ulong(self, pos):
348        "Return an unsigned long at given position"
349        return unpack('>L',self._ttf_data[pos:pos+4])[0]
350
351    def get_table(self, tag):
352        "Return the given TTF table"
353        pos, length = self.get_table_pos(tag)
354        return self._ttf_data[pos:pos+length]
355
356class TTFontMaker:
357    "Basic TTF file generator"
358
359    def __init__(self):
360        "Initializes the generator."
361        self.tables = {}
362
363    def add(self, tag, data):
364        "Adds a table to the TTF file."
365        if tag == 'head':
366            data = splice(data, 8, b'\0\0\0\0')
367        self.tables[tag] = data
368
369    def makeStream(self):
370        "Finishes the generation and returns the TTF file as a string"
371        stm = getBytesIO()
372        write = stm.write
373
374        tables = self.tables
375        numTables = len(tables)
376        searchRange = 1
377        entrySelector = 0
378        while searchRange * 2 <= numTables:
379            searchRange = searchRange * 2
380            entrySelector = entrySelector + 1
381        searchRange = searchRange * 16
382        rangeShift = numTables * 16 - searchRange
383
384        # Header
385        write(pack(">lHHHH", 0x00010000, numTables, searchRange,
386                                 entrySelector, rangeShift))
387
388        # Table directory
389        offset = 12 + numTables * 16
390        wStr = (lambda x:write(bytes(tag,'latin1'))) if isPy3 else write
391        tables_items = list(sorted(tables.items()))
392        for tag, data in tables_items:
393            if tag == 'head':
394                head_start = offset
395            checksum = calcChecksum(data)
396            wStr(tag)
397            write(pack(">LLL", checksum, offset, len(data)))
398            paddedLength = (len(data)+3)&~3
399            offset = offset + paddedLength
400
401        # Table data
402        for tag, data in tables_items:
403            data += b"\0\0\0"
404            write(data[:len(data)&~3])
405
406        checksum = calcChecksum(stm.getvalue())
407        checksum = add32(0xB1B0AFBA, -checksum)
408        stm.seek(head_start + 8)
409        write(pack('>L', checksum))
410
411        return stm.getvalue()
412
413#this is used in the cmap encoding fmt==2 case
414CMapFmt2SubHeader = namedtuple('CMapFmt2SubHeader', 'firstCode entryCount idDelta idRangeOffset')
415
416class TTFNameBytes(bytesT):
417    '''class used to return named strings'''
418    def __new__(cls,b,enc='utf8'):
419        try:
420            ustr = b.decode(enc)
421        except:
422            ustr = b.decode('latin1')
423        self = bytesT.__new__(cls,ustr.encode('utf8'))
424        self.ustr = ustr
425        return self
426
427class TTFontFile(TTFontParser):
428    "TTF file parser and generator"
429    _agfnc = 0
430    _agfnm = {}
431
432    def __init__(self, file, charInfo=1, validate=0,subfontIndex=0):
433        """Loads and parses a TrueType font file.
434
435        file can be a filename or a file object.  If validate is set to a false
436        values, skips checksum validation.  This can save time, especially if
437        the font is large.  See TTFontFile.extractInfo for more information.
438        """
439        if isStr(subfontIndex): #bytes or unicode
440            sfi = 0
441            __dict__ = self.__dict__.copy()
442            while True:
443                TTFontParser.__init__(self, file, validate=validate,subfontIndex=sfi)
444                numSubfonts = self.numSubfonts = self.read_ulong()
445                self.extractInfo(charInfo)
446                if (isBytes(subfontIndex) and subfontIndex==self.name
447                    or subfontIndex==self.name.ustr): #we found it
448                    return
449                if not sfi:
450                    __dict__.update(dict(_ttf_data=self._ttf_data, filename=self.filename))
451                sfi += 1
452                if sfi>=numSubfonts:
453                    raise ValueError('cannot find %r subfont %r' % (self.filename, subfontIndex))
454                self.__dict__.clear()
455                self.__dict__.update(__dict__)
456        else:
457            TTFontParser.__init__(self, file, validate=validate,subfontIndex=subfontIndex)
458            self.extractInfo(charInfo)
459
460    def extractInfo(self, charInfo=1):
461        """
462        Extract typographic information from the loaded font file.
463
464        The following attributes will be set::
465
466            name         PostScript font name
467            flags        Font flags
468            ascent       Typographic ascender in 1/1000ths of a point
469            descent      Typographic descender in 1/1000ths of a point
470            capHeight    Cap height in 1/1000ths of a point (0 if not available)
471            bbox         Glyph bounding box [l,t,r,b] in 1/1000ths of a point
472            _bbox        Glyph bounding box [l,t,r,b] in unitsPerEm
473            unitsPerEm   Glyph units per em
474            italicAngle  Italic angle in degrees ccw
475            stemV        stem weight in 1/1000ths of a point (approximate)
476
477        If charInfo is true, the following will also be set::
478
479            defaultWidth   default glyph width in 1/1000ths of a point
480            charWidths     dictionary of character widths for every supported UCS character
481                           code
482
483        This will only work if the font has a Unicode cmap (platform 3,
484        encoding 1, format 4 or platform 0 any encoding format 4).  Setting
485        charInfo to false avoids this requirement
486
487        """
488        # name - Naming table
489        name_offset = self.seek_table("name")
490        format = self.read_ushort()
491        if format != 0:
492            raise TTFError("Unknown name table format (%d)" % format)
493        numRecords = self.read_ushort()
494        string_data_offset = name_offset + self.read_ushort()
495        names = {1:None,2:None,3:None,4:None,6:None}
496        K = list(names.keys())
497        nameCount = len(names)
498        for i in xrange(numRecords):
499            platformId = self.read_ushort()
500            encodingId = self.read_ushort()
501            languageId = self.read_ushort()
502            nameId = self.read_ushort()
503            length = self.read_ushort()
504            offset = self.read_ushort()
505            if nameId not in K: continue
506            N = None
507            if platformId == 3 and encodingId == 1 and languageId == 0x409: # Microsoft, Unicode, US English, PS Name
508                opos = self._pos
509                try:
510                    self.seek(string_data_offset + offset)
511                    if length % 2 != 0:
512                        raise TTFError("PostScript name is UTF-16BE string of odd length")
513                    N = TTFNameBytes(self.get_chunk(string_data_offset + offset, length),'utf_16_be')
514                finally:
515                    self._pos = opos
516            elif platformId == 1 and encodingId == 0 and languageId == 0: # Macintosh, Roman, English, PS Name
517                # According to OpenType spec, if PS name exists, it must exist
518                # both in MS Unicode and Macintosh Roman formats.  Apparently,
519                # you can find live TTF fonts which only have Macintosh format.
520                N = TTFNameBytes(self.get_chunk(string_data_offset + offset, length),'mac_roman')
521            if N and names[nameId]==None:
522                names[nameId] = N
523                nameCount -= 1
524                if nameCount==0: break
525        if names[6] is not None:
526            psName = names[6]
527        elif names[4] is not None:
528            psName = names[4]
529        # Fine, one last try before we bail.
530        elif names[1] is not None:
531            psName = names[1]
532        else:
533            psName = None
534
535        # Don't just assume, check for None since some shoddy fonts cause crashes here...
536        if not psName:
537            if rl_config.autoGenerateTTFMissingTTFName:
538                fn = self.filename
539                if fn:
540                    bfn = os.path.splitext(os.path.basename(fn))[0]
541                if not fn:
542                    psName = bytestr('_RL_%s_%s_TTF' % (time.time(), self.__class__._agfnc))
543                    self.__class__._agfnc += 1
544                else:
545                    psName = self._agfnm.get(fn,'')
546                    if not psName:
547                        if bfn:
548                            psName = bytestr('_RL_%s_TTF' % bfn)
549                        else:
550                            psName = bytestr('_RL_%s_%s_TTF' % (time.time(), self.__class__._agfnc))
551                            self.__class__._agfnc += 1
552                        self._agfnm[fn] = psName
553            else:
554                raise TTFError("Could not find PostScript font name")
555
556        psName = psName.__class__(psName.replace(b" ", b"-"))  #Dinu Gherman's fix for font names with spaces
557
558        for c in psName:
559            if char2int(c)>126 or c in b' [](){}<>/%':
560                raise TTFError("psName=%r contains invalid character %s" % (psName,ascii(c)))
561        self.name = psName
562        self.familyName = names[1] or psName
563        self.styleName = names[2] or 'Regular'
564        self.fullName = names[4] or psName
565        self.uniqueFontID = names[3] or psName
566
567        # head - Font header table
568        try:
569            self.seek_table("head")
570        except:
571            raise TTFError('head table not found ttf name=%s' % self.name)
572        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
573        if ver_maj != 1:
574            raise TTFError('Unknown head table version %d.%04x' % (ver_maj, ver_min))
575        self.fontRevision = self.read_ushort(), self.read_ushort()
576
577        self.skip(4)
578        magic = self.read_ulong()
579        if magic != 0x5F0F3CF5:
580            raise TTFError('Invalid head table magic %04x' % magic)
581        self.skip(2)
582        self.unitsPerEm = unitsPerEm = self.read_ushort()
583        scale = lambda x, unitsPerEm=unitsPerEm: x * 1000. / unitsPerEm
584        self.skip(16)
585        xMin = self.read_short()
586        yMin = self.read_short()
587        xMax = self.read_short()
588        yMax = self.read_short()
589        self.bbox = list(map(scale, [xMin, yMin, xMax, yMax]))
590        self.skip(3*2)
591        indexToLocFormat = self.read_ushort()
592        glyphDataFormat = self.read_ushort()
593
594        # OS/2 - OS/2 and Windows metrics table
595        # (needs data from head table)
596        subsettingAllowed = True
597        if "OS/2" in self.table:
598            self.seek_table("OS/2")
599            version = self.read_ushort()
600            self.skip(2)
601            usWeightClass = self.read_ushort()
602            self.skip(2)
603            fsType = self.read_ushort()
604            if fsType==0x0002 or (fsType & 0x0300):
605                subsettingAllowed = os.path.basename(self.filename) not in rl_config.allowTTFSubsetting
606            self.skip(58)   #11*2 + 10 + 4*4 + 4 + 3*2
607            sTypoAscender = self.read_short()
608            sTypoDescender = self.read_short()
609            self.ascent = scale(sTypoAscender)      # XXX: for some reason it needs to be multiplied by 1.24--1.28
610            self.descent = scale(sTypoDescender)
611
612            if version > 1:
613                self.skip(16)   #3*2 + 2*4 + 2
614                sCapHeight = self.read_short()
615                self.capHeight = scale(sCapHeight)
616            else:
617                self.capHeight = self.ascent
618        else:
619            # Microsoft TTFs require an OS/2 table; Apple ones do not.  Try to
620            # cope.  The data is not very important anyway.
621            usWeightClass = 500
622            self.ascent = scale(yMax)
623            self.descent = scale(yMin)
624            self.capHeight = self.ascent
625
626        # There's no way to get stemV from a TTF file short of analyzing actual outline data
627        # This fuzzy formula is taken from pdflib sources, but we could just use 0 here
628        self.stemV = 50 + int((usWeightClass / 65.0) ** 2)
629
630        # post - PostScript table
631        # (needs data from OS/2 table)
632        self.seek_table("post")
633        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
634        if ver_maj not in (1, 2, 3, 4):
635            # Adobe/MS documents 1, 2, 2.5, 3; Apple also has 4.
636            # From Apple docs it seems that we do not need to care
637            # about the exact version, so if you get this error, you can
638            # try to remove this check altogether.
639            raise TTFError('Unknown post table version %d.%04x' % (ver_maj, ver_min))
640        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
641        self.underlinePosition = self.read_short()
642        self.underlineThickness = self.read_short()
643        isFixedPitch = self.read_ulong()
644
645        self.flags = FF_SYMBOLIC        # All fonts that contain characters
646                                        # outside the original Adobe character
647                                        # set are considered "symbolic".
648        if self.italicAngle!= 0:
649            self.flags = self.flags | FF_ITALIC
650        if usWeightClass >= 600:        # FW_REGULAR == 500, FW_SEMIBOLD == 600
651            self.flags = self.flags | FF_FORCEBOLD
652        if isFixedPitch:
653            self.flags = self.flags | FF_FIXED
654        # XXX: FF_SERIF?  FF_SCRIPT?  FF_ALLCAP?  FF_SMALLCAP?
655
656        # hhea - Horizontal header table
657        self.seek_table("hhea")
658        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
659        if ver_maj != 1:
660            raise TTFError('Unknown hhea table version %d.%04x' % (ver_maj, ver_min))
661        self.skip(28)
662        metricDataFormat = self.read_ushort()
663        if metricDataFormat != 0:
664            raise TTFError('Unknown horizontal metric data format (%d)' % metricDataFormat)
665        numberOfHMetrics = self.read_ushort()
666        if numberOfHMetrics == 0:
667            raise TTFError('Number of horizontal metrics is 0')
668
669        # maxp - Maximum profile table
670        self.seek_table("maxp")
671        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
672        if ver_maj != 1:
673            raise TTFError('Unknown maxp table version %d.%04x' % (ver_maj, ver_min))
674        self.numGlyphs = numGlyphs = self.read_ushort()
675        if not subsettingAllowed:
676            if self.numGlyphs>0xFF:
677                raise TTFError('Font does not allow subsetting/embedding (%04X)' % fsType)
678            else:
679                self._full_font = True
680        else:
681            self._full_font = False
682
683        if not charInfo:
684            self.charToGlyph = None
685            self.defaultWidth = None
686            self.charWidths = None
687            return
688
689        if glyphDataFormat != 0:
690            raise TTFError('Unknown glyph data format (%d)' % glyphDataFormat)
691
692        # cmap - Character to glyph index mapping table
693        cmap_offset = self.seek_table("cmap")
694        cmapVersion = self.read_ushort()
695        cmapTableCount = self.read_ushort()
696        if cmapTableCount==0 and cmapVersion!=0:
697            cmapTableCount, cmapVersion = cmapVersion, cmapTableCount
698        encoffs = None
699        enc = 0
700        for n in xrange(cmapTableCount):
701            platform = self.read_ushort()
702            encoding = self.read_ushort()
703            offset = self.read_ulong()
704            if platform==3:
705                enc = 1
706                encoffs = offset
707            elif platform==1 and encoding==0 and enc!=1:
708                enc = 2
709                encoffs = offset
710            elif platform==1 and encoding==1:
711                enc = 1
712                encoffs = offset
713            elif platform==0 and encoding!=5:
714                enc = 1
715                encoffs = offset
716        if encoffs is None:
717            raise TTFError('could not find a suitable cmap encoding')
718        encoffs += cmap_offset
719        self.seek(encoffs)
720        fmt = self.read_ushort()
721        self.charToGlyph = charToGlyph = {}
722        glyphToChar = {}
723        if fmt in (13,12,10,8):
724            self.skip(2)    #padding
725            length = self.read_ulong()
726            lang = self.read_ulong()
727        else:
728            length = self.read_ushort()
729            lang = self.read_ushort()
730        if fmt==0:
731            T = [self.read_uint8() for i in xrange(length-6)]
732            for unichar in xrange(min(256,self.numGlyphs,len(table))):
733                glyph = T[glyph]
734                charToGlyph[unichar] = glyph
735                glyphToChar.setdefault(glyph,[]).append(unichar)
736        elif fmt==4:
737            limit = encoffs + length
738            segCount = int(self.read_ushort() / 2.0)
739            self.skip(6)
740            endCount = [self.read_ushort() for _ in xrange(segCount)]
741            self.skip(2)
742            startCount = [self.read_ushort() for _ in xrange(segCount)]
743            idDelta = [self.read_short() for _ in xrange(segCount)]
744            idRangeOffset_start = self._pos
745            idRangeOffset = [self.read_ushort() for _ in xrange(segCount)]
746
747            # Now it gets tricky.
748            for n in xrange(segCount):
749                for unichar in xrange(startCount[n], endCount[n] + 1):
750                    if idRangeOffset[n] == 0:
751                        glyph = (unichar + idDelta[n]) & 0xFFFF
752                    else:
753                        offset = (unichar - startCount[n]) * 2 + idRangeOffset[n]
754                        offset = idRangeOffset_start + 2 * n + offset
755                        if offset >= limit:
756                            # workaround for broken fonts (like Thryomanes)
757                            glyph = 0
758                        else:
759                            glyph = self.get_ushort(offset)
760                            if glyph != 0:
761                                glyph = (glyph + idDelta[n]) & 0xFFFF
762                    charToGlyph[unichar] = glyph
763                    glyphToChar.setdefault(glyph,[]).append(unichar)
764        elif fmt==6:
765            first = self.read_ushort()
766            count = self.read_ushort()
767            for glyph in xrange(first,first+count):
768                unichar = self.read_ushort()
769                charToGlyph[unichar] = glyph
770                glyphToChar.setdefault(glyph,[]).append(unichar)
771        elif fmt==10:
772            first = self.read_ulong()
773            count = self.read_ulong()
774            for glyph in xrange(first,first+count):
775                unichar = self.read_ushort()
776                charToGlyph[unichar] = glyph
777                glyphToChar.setdefault(glyph,[]).append(unichar)
778        elif fmt==12:
779            segCount = self.read_ulong()
780            for n in xrange(segCount):
781                start = self.read_ulong()
782                end = self.read_ulong()
783                inc = self.read_ulong() - start
784                for unichar in xrange(start,end+1):
785                    glyph = unichar + inc
786                    charToGlyph[unichar] = glyph
787                    glyphToChar.setdefault(glyph,[]).append(unichar)
788        elif fmt==13:
789            segCount = self.read_ulong()
790            for n in xrange(segCount):
791                start = self.read_ulong()
792                end = self.read_ulong()
793                gid = self.read_ulong()
794                for unichar in xrange(start,end+1):
795                    charToGlyph[unichar] = gid
796                    glyphToChar.setdefault(gid,[]).append(unichar)
797        elif fmt==2:
798            T = [self.read_ushort() for i in xrange(256)]   #subheader keys
799            maxSHK = max(T)
800            SH = []
801            for i in xrange(maxSHK+1):
802                firstCode = self.read_ushort()
803                entryCount = self.read_ushort()
804                idDelta = self.read_ushort()
805                idRangeOffset = (self.read_ushort()-(maxSHK-i)*8-2)>>1
806                SH.append(CMapFmt2SubHeader(firstCode,entryCount,idDelta,idRangeOffset))
807            #number of glyph indexes to read. it is the length of the entire subtable minus that bit we've read so far
808            entryCount = (length-(self._pos-(cmap_offset+encoffs)))>>1
809            glyphs = [self.read_short() for i in xrange(entryCount)]
810            last = -1
811            for unichar in xrange(256):
812                if T[unichar]==0:
813                    #Special case, single byte encoding entry, look unichar up in subhead
814                    if last!=-1:
815                        glyph = 0
816                    elif (unichar<SH[0].firstCode or unichar>=SH[0].firstCode+SH[0].entryCount or
817                            SH[0].idRangeOffset+(unichar-SH[0].firstCode)>=entryCount):
818                        glyph = 0
819                    else:
820                        glyph = glyphs[SH[0].idRangeOffset+(unichar-SH[0].firstCode)]
821                        if glyph!=0:
822                            glyph += SH[0].idDelta
823                    #assume the single byte codes are ascii
824                    if glyph!=0 and glyph<self.numGlyphs:
825                        charToGlyph[unichar] = glyph
826                        glyphToChar.setdefault(glyph,[]).append(unichar)
827                else:
828                    k = T[unichar]
829                    for j in xrange(SH[k].entryCount):
830                        if SH[k].idRangeOffset+j>=entryCount:
831                            glyph = 0
832                        else:
833                            glyph = glyphs[SH[k].idRangeOffset+j]
834                            if glyph!= 0:
835                                glyph += SH[k].idDelta
836                        if glyph!=0 and glyph<self.numGlyphs:
837                            enc = (unichar<<8)|(j+SH[k].firstCode)
838                            charToGlyph[enc] = glyph
839                            glyphToChar.setdefault(glyph,[]).append(enc)
840                    if last==-1:
841                        last = unichar
842        else:
843            raise ValueError('Unsupported cmap encoding format %d' % fmt)
844
845        # hmtx - Horizontal metrics table
846        # (needs data from hhea, maxp, and cmap tables)
847        self.seek_table("hmtx")
848        aw = None
849        self.charWidths = charWidths = {}
850        self.hmetrics = []
851        for glyph in xrange(numberOfHMetrics):
852            # advance width and left side bearing.  lsb is actually signed
853            # short, but we don't need it anyway (except for subsetting)
854            aw, lsb = self.read_ushort(), self.read_ushort()
855            self.hmetrics.append((aw, lsb))
856            aw = scale(aw)
857            if glyph == 0:
858                self.defaultWidth = aw
859            if glyph in glyphToChar:
860                for char in glyphToChar[glyph]:
861                    charWidths[char] = aw
862        for glyph in xrange(numberOfHMetrics, numGlyphs):
863            # the rest of the table only lists advance left side bearings.
864            # so we reuse aw set by the last iteration of the previous loop
865            lsb = self.read_ushort()
866            self.hmetrics.append((aw, lsb))
867            if glyph in glyphToChar:
868                for char in glyphToChar[glyph]:
869                    charWidths[char] = aw
870
871        # loca - Index to location
872        if 'loca' not in self.table: raise TTFError('missing location table')
873        self.seek_table('loca')
874        self.glyphPos = []
875        if indexToLocFormat == 0:
876            for n in xrange(numGlyphs + 1):
877                self.glyphPos.append(self.read_ushort() << 1)
878        elif indexToLocFormat == 1:
879            for n in xrange(numGlyphs + 1):
880                self.glyphPos.append(self.read_ulong())
881        else:
882            raise TTFError('Unknown location table format (%d)' % indexToLocFormat)
883        if 0x20 in charToGlyph:
884            charToGlyph[0xa0] = charToGlyph[0x20]
885            charWidths[0xa0] = charWidths[0x20]
886        elif 0xa0 in charToGlyph:
887            charToGlyph[0x20] = charToGlyph[0xa0]
888            charWidths[0x20] = charWidths[0xa0]
889
890    # Subsetting
891
892    def makeSubset(self, subset):
893        """Create a subset of a TrueType font"""
894        output = TTFontMaker()
895
896        # Build a mapping of glyphs in the subset to glyph numbers in
897        # the original font.  Also build a mapping of UCS codes to
898        # glyph values in the new font.
899
900        # Start with 0 -> 0: "missing character"
901        glyphMap = [0]                  # new glyph index -> old glyph index
902        glyphSet = {0:0}                # old glyph index -> new glyph index
903        codeToGlyph = {}                # unicode -> new glyph index
904        for code in subset:
905            if code in self.charToGlyph:
906                originalGlyphIdx = self.charToGlyph[code]
907            else:
908                originalGlyphIdx = 0
909            if originalGlyphIdx not in glyphSet:
910                glyphSet[originalGlyphIdx] = len(glyphMap)
911                glyphMap.append(originalGlyphIdx)
912            codeToGlyph[code] = glyphSet[originalGlyphIdx]
913
914        # Also include glyphs that are parts of composite glyphs
915        start = self.get_table_pos('glyf')[0]
916        n = 0
917        while n < len(glyphMap):
918            originalGlyphIdx = glyphMap[n]
919            glyphPos = self.glyphPos[originalGlyphIdx]
920            glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos
921            n += 1
922            if not glyphLen: continue
923            self.seek(start + glyphPos)
924            numberOfContours = self.read_short()
925            if numberOfContours < 0:
926                # composite glyph
927                self.skip(8)
928                flags = GF_MORE_COMPONENTS
929                while flags & GF_MORE_COMPONENTS:
930                    flags = self.read_ushort()
931                    glyphIdx = self.read_ushort()
932                    if glyphIdx not in glyphSet:
933                        glyphSet[glyphIdx] = len(glyphMap)
934                        glyphMap.append(glyphIdx)
935                    if flags & GF_ARG_1_AND_2_ARE_WORDS:
936                        self.skip(4)
937                    else:
938                        self.skip(2)
939                    if flags & GF_WE_HAVE_A_SCALE:
940                        self.skip(2)
941                    elif flags & GF_WE_HAVE_AN_X_AND_Y_SCALE:
942                        self.skip(4)
943                    elif flags & GF_WE_HAVE_A_TWO_BY_TWO:
944                        self.skip(8)
945
946
947        # The following tables are simply copied from the original
948        for tag in ('name', 'OS/2', 'cvt ', 'fpgm', 'prep'):
949            try:
950                output.add(tag, self.get_table(tag))
951            except KeyError:
952                # Apparently some of the tables are optional (cvt, fpgm, prep).
953                # The lack of the required ones (name, OS/2) would have already
954                # been caught before.
955                pass
956
957        # post - PostScript
958        post = b"\x00\x03\x00\x00" + self.get_table('post')[4:16] + b"\x00" * 16
959        output.add('post', post)
960
961        numGlyphs = len(glyphMap)
962
963        # hmtx - Horizontal Metrics
964        hmtx = []
965        for n in xrange(numGlyphs):
966            aw, lsb = self.hmetrics[glyphMap[n]]
967            hmtx.append(int(aw))
968            hmtx.append(int(lsb))
969
970        #work out n as 0 or first aw that's the start of a run
971        n = len(hmtx)-2
972        while n and hmtx[n]==hmtx[n-2]:
973            n -= 2
974        if not n: n = 2                 #need at least one pair
975        numberOfHMetrics = n>>1         #number of full H Metric pairs
976        hmtx = hmtx[:n] + hmtx[n+1::2]  #full pairs + all the trailing lsb's
977
978        hmtx = pack(*([">%dH" % len(hmtx)] + hmtx))
979        output.add('hmtx', hmtx)
980
981        # hhea - Horizontal Header
982        hhea = self.get_table('hhea')
983        hhea = _set_ushort(hhea, 34, numberOfHMetrics)
984        output.add('hhea', hhea)
985
986        # maxp - Maximum Profile
987        maxp = self.get_table('maxp')
988        maxp = _set_ushort(maxp, 4, numGlyphs)
989        output.add('maxp', maxp)
990
991        # cmap - Character to glyph mapping
992        # XXX maybe use format 0 if possible, not 6?
993        entryCount = len(subset)
994        length = 10 + entryCount * 2
995        cmap = [0, 1,           # version, number of tables
996                1, 0, 0,12,     # platform, encoding, offset (hi,lo)
997                6, length, 0,   # format, length, language
998                0,
999                entryCount] + \
1000               list(map(codeToGlyph.get, subset))
1001        cmap = pack(*([">%dH" % len(cmap)] + cmap))
1002        output.add('cmap', cmap)
1003
1004        # glyf - Glyph data
1005        glyphData = self.get_table('glyf')
1006        offsets = []
1007        glyf = []
1008        pos = 0
1009        for n in xrange(numGlyphs):
1010            offsets.append(pos)
1011            originalGlyphIdx = glyphMap[n]
1012            glyphPos = self.glyphPos[originalGlyphIdx]
1013            glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos
1014            data = glyphData[glyphPos:glyphPos+glyphLen]
1015            # Fix references in composite glyphs
1016            if glyphLen > 2 and unpack(">h", data[:2])[0] < 0:
1017                # composite glyph
1018                pos_in_glyph = 10
1019                flags = GF_MORE_COMPONENTS
1020                while flags & GF_MORE_COMPONENTS:
1021                    flags = unpack(">H", data[pos_in_glyph:pos_in_glyph+2])[0]
1022                    glyphIdx = unpack(">H", data[pos_in_glyph+2:pos_in_glyph+4])[0]
1023                    data = _set_ushort(data, pos_in_glyph + 2, glyphSet[glyphIdx])
1024                    pos_in_glyph = pos_in_glyph + 4
1025                    if flags & GF_ARG_1_AND_2_ARE_WORDS:
1026                        pos_in_glyph = pos_in_glyph + 4
1027                    else:
1028                        pos_in_glyph = pos_in_glyph + 2
1029                    if flags & GF_WE_HAVE_A_SCALE:
1030                        pos_in_glyph = pos_in_glyph + 2
1031                    elif flags & GF_WE_HAVE_AN_X_AND_Y_SCALE:
1032                        pos_in_glyph = pos_in_glyph + 4
1033                    elif flags & GF_WE_HAVE_A_TWO_BY_TWO:
1034                        pos_in_glyph = pos_in_glyph + 8
1035            glyf.append(data)
1036            pos = pos + glyphLen
1037            if pos % 4 != 0:
1038                padding = 4 - pos % 4
1039                glyf.append(b'\0' * padding)
1040                pos = pos + padding
1041        offsets.append(pos)
1042        output.add('glyf', b''.join(glyf))
1043
1044        # loca - Index to location
1045        loca = []
1046        if (pos + 1) >> 1 > 0xFFFF:
1047            indexToLocFormat = 1        # long format
1048            for offset in offsets:
1049                loca.append(offset)
1050            loca = pack(*([">%dL" % len(loca)] + loca))
1051        else:
1052            indexToLocFormat = 0        # short format
1053            for offset in offsets:
1054                loca.append(offset >> 1)
1055            loca = pack(*([">%dH" % len(loca)] + loca))
1056        output.add('loca', loca)
1057
1058        # head - Font header
1059        head = self.get_table('head')
1060        head = _set_ushort(head, 50, indexToLocFormat)
1061        output.add('head', head)
1062
1063        return output.makeStream()
1064
1065
1066#
1067# TrueType font embedding
1068#
1069
1070# PDF font flags (see PDF Reference Guide table 5.19)
1071FF_FIXED        = 1 <<  1-1
1072FF_SERIF        = 1 <<  2-1
1073FF_SYMBOLIC     = 1 <<  3-1
1074FF_SCRIPT       = 1 <<  4-1
1075FF_NONSYMBOLIC  = 1 <<  6-1
1076FF_ITALIC       = 1 <<  7-1
1077FF_ALLCAP       = 1 << 17-1
1078FF_SMALLCAP     = 1 << 18-1
1079FF_FORCEBOLD    = 1 << 19-1
1080
1081class TTFontFace(TTFontFile, pdfmetrics.TypeFace):
1082    """TrueType typeface.
1083
1084    Conceptually similar to a single byte typeface, but the glyphs are
1085    identified by UCS character codes instead of glyph names."""
1086
1087    def __init__(self, filename, validate=0, subfontIndex=0):
1088        "Loads a TrueType font from filename."
1089        pdfmetrics.TypeFace.__init__(self, None)
1090        TTFontFile.__init__(self, filename, validate=validate, subfontIndex=subfontIndex)
1091
1092    def getCharWidth(self, code):
1093        "Returns the width of character U+<code>"
1094        return self.charWidths.get(code, self.defaultWidth)
1095
1096    def addSubsetObjects(self, doc, fontname, subset):
1097        """Generate a TrueType font subset and add it to the PDF document.
1098        Returns a PDFReference to the new FontDescriptor object."""
1099
1100        fontFile = pdfdoc.PDFStream()
1101        fontFile.content = self.makeSubset(subset)
1102        fontFile.dictionary['Length1'] = len(fontFile.content)
1103        if doc.compression:
1104            fontFile.filters = [pdfdoc.PDFZCompress]
1105        fontFileRef = doc.Reference(fontFile, 'fontFile:%s(%s)' % (self.filename, fontname))
1106
1107        flags = self.flags & ~ FF_NONSYMBOLIC
1108        flags = flags | FF_SYMBOLIC
1109
1110        fontDescriptor = pdfdoc.PDFDictionary({
1111            'Type': '/FontDescriptor',
1112            'Ascent': self.ascent,
1113            'CapHeight': self.capHeight,
1114            'Descent': self.descent,
1115            'Flags': flags,
1116            'FontBBox': pdfdoc.PDFArray(self.bbox),
1117            'FontName': pdfdoc.PDFName(fontname),
1118            'ItalicAngle': self.italicAngle,
1119            'StemV': self.stemV,
1120            'FontFile2': fontFileRef,
1121            })
1122        return doc.Reference(fontDescriptor, 'fontDescriptor:' + fontname)
1123
1124class TTEncoding:
1125    """Encoding for TrueType fonts (always UTF-8).
1126
1127    TTEncoding does not directly participate in PDF object creation, since
1128    we need a number of different 8-bit encodings for every generated font
1129    subset.  TTFont itself cares about that."""
1130
1131    def __init__(self):
1132        self.name = "UTF-8"
1133
1134class TTFont:
1135    """Represents a TrueType font.
1136
1137    Its encoding is always UTF-8.
1138
1139    Note: you cannot use the same TTFont object for different documents
1140    at the same time.
1141
1142    Example of usage:
1143
1144        font = ttfonts.TTFont('PostScriptFontName', '/path/to/font.ttf')
1145        pdfmetrics.registerFont(font)
1146
1147        canvas.setFont('PostScriptFontName', size)
1148        canvas.drawString(x, y, "Some text encoded in UTF-8")
1149    """
1150    class State:
1151        namePrefix = 'F'
1152        def __init__(self,asciiReadable=None,ttf=None):
1153            A = self.assignments = {}
1154            self.nextCode = 0
1155            self.internalName = None
1156            self.frozen = 0
1157            if getattr(getattr(ttf,'face',None),'_full_font',None):
1158                C = set(self.charToGlyph.keys())
1159                if 0xa0 in C: C.remove(0xa0)
1160                for n in xrange(256):
1161                    if n in C:
1162                        A[n] = n
1163                        C.remove(n)
1164                for n in C:
1165                    A[n] = n
1166                self.subsets = [[n for n in A]]
1167                self.frozen = True
1168                return
1169
1170            if asciiReadable is None:
1171                asciiReadable = rl_config.ttfAsciiReadable
1172
1173            if asciiReadable:
1174                # Let's add the first 128 unicodes to the 0th subset, so ' '
1175                # always has code 32 (for word spacing to work) and the ASCII
1176                # output is readable
1177                subset0 = list(xrange(128))
1178                self.subsets = [subset0]
1179                for n in subset0:
1180                    A[n] = n
1181                self.nextCode = 128
1182            else:
1183                self.subsets = [[32]*33]
1184                A[32] = 32
1185
1186    _multiByte = 1      # We want our own stringwidth
1187    _dynamicFont = 1    # We want dynamic subsetting
1188
1189    def __init__(self, name, filename, validate=0, subfontIndex=0,asciiReadable=None):
1190        """Loads a TrueType font from filename.
1191
1192        If validate is set to a false values, skips checksum validation.  This
1193        can save time, especially if the font is large.
1194        """
1195        self.fontName = name
1196        self.face = TTFontFace(filename, validate=validate, subfontIndex=subfontIndex)
1197        self.encoding = TTEncoding()
1198        from weakref import WeakKeyDictionary
1199        self.state = WeakKeyDictionary()
1200        if asciiReadable is None:
1201            asciiReadable = rl_config.ttfAsciiReadable
1202        self._asciiReadable = asciiReadable
1203
1204    def stringWidth(self,text,size,encoding='utf8'):
1205        return instanceStringWidthTTF(self,text,size,encoding)
1206
1207    def _assignState(self,doc,asciiReadable=None,namePrefix=None):
1208        '''convenience function for those wishing to roll their own state properties'''
1209        if asciiReadable is None:
1210            asciiReadable = self._asciiReadable
1211        try:
1212            state = self.state[doc]
1213        except KeyError:
1214            state = self.state[doc] = TTFont.State(asciiReadable,self)
1215            if namePrefix is not None:
1216                state.namePrefix = namePrefix
1217        return state
1218
1219    def splitString(self, text, doc, encoding='utf-8'):
1220        """Splits text into a number of chunks, each of which belongs to a
1221        single subset.  Returns a list of tuples (subset, string).  Use subset
1222        numbers with getSubsetInternalName.  Doc is needed for distinguishing
1223        subsets when building different documents at the same time."""
1224        asciiReadable = self._asciiReadable
1225        try: state = self.state[doc]
1226        except KeyError: state = self.state[doc] = TTFont.State(asciiReadable,self)
1227        curSet = -1
1228        cur = []
1229        results = []
1230        if not isUnicode(text):
1231            text = text.decode('utf-8')     # encoding defaults to utf-8
1232        assignments = state.assignments
1233        subsets = state.subsets
1234        reserveTTFNotdef = rl_config.reserveTTFNotdef
1235        for code in map(ord,text):
1236            if code==0xa0: code = 32    #map nbsp into space
1237            if code in assignments:
1238                n = assignments[code]
1239            else:
1240                if state.frozen:
1241                    raise pdfdoc.PDFError("Font %s is already frozen, cannot add new character U+%04X" % (self.fontName, code))
1242                n = state.nextCode
1243                if n&0xFF==32:
1244                    # make code 32 always be a space character
1245                    if n!=32: subsets[n >> 8].append(32)
1246                    state.nextCode += 1
1247                    n = state.nextCode
1248                if n>32:
1249                    if not(n&0xFF):
1250                        if reserveTTFNotdef:
1251                            subsets.append([0]) #force code 0 in as notdef
1252                            state.nextCode += 1
1253                            n = state.nextCode
1254                        else:
1255                            subsets.append([])
1256                    subsets[n >> 8].append(code)
1257                else:
1258                    subsets[0][n] = code
1259                state.nextCode += 1
1260                assignments[code] = n
1261                #subsets[n>>8].append(code)
1262            if (n >> 8) != curSet:
1263                if cur:
1264                    results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur)))
1265                curSet = (n >> 8)
1266                cur = []
1267            cur.append(n & 0xFF)
1268        if cur:
1269            results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur)))
1270        return results
1271
1272    def getSubsetInternalName(self, subset, doc):
1273        """Returns the name of a PDF Font object corresponding to a given
1274        subset of this dynamic font.  Use this function instead of
1275        PDFDocument.getInternalFontName."""
1276        try: state = self.state[doc]
1277        except KeyError: state = self.state[doc] = TTFont.State(self._asciiReadable)
1278        if subset < 0 or subset >= len(state.subsets):
1279            raise IndexError('Subset %d does not exist in font %s' % (subset, self.fontName))
1280        if state.internalName is None:
1281            state.internalName = state.namePrefix +repr(len(doc.fontMapping) + 1)
1282            doc.fontMapping[self.fontName] = '/' + state.internalName
1283            doc.delayedFonts.append(self)
1284        return '/%s+%d' % (state.internalName, subset)
1285
1286    def addObjects(self, doc):
1287        """Makes  one or more PDF objects to be added to the document.  The
1288        caller supplies the internal name to be used (typically F1, F2, ... in
1289        sequence).
1290
1291        This method creates a number of Font and FontDescriptor objects.  Every
1292        FontDescriptor is a (no more than) 256 character subset of the original
1293        TrueType font."""
1294        try: state = self.state[doc]
1295        except KeyError: state = self.state[doc] = TTFont.State(self._asciiReadable)
1296        state.frozen = 1
1297        for n,subset in enumerate(state.subsets):
1298            internalName = self.getSubsetInternalName(n, doc)[1:]
1299            baseFontName = (b''.join((SUBSETN(n),b'+',self.face.name,self.face.subfontNameX))).decode('pdfdoc')
1300
1301            pdfFont = pdfdoc.PDFTrueTypeFont()
1302            pdfFont.__Comment__ = 'Font %s subset %d' % (self.fontName, n)
1303            pdfFont.Name = internalName
1304            pdfFont.BaseFont = baseFontName
1305
1306            pdfFont.FirstChar = 0
1307            pdfFont.LastChar = len(subset) - 1
1308
1309            widths = list(map(self.face.getCharWidth, subset))
1310            pdfFont.Widths = pdfdoc.PDFArray(widths)
1311
1312            cmapStream = pdfdoc.PDFStream()
1313            cmapStream.content = makeToUnicodeCMap(baseFontName, subset)
1314            if doc.compression:
1315                cmapStream.filters = [pdfdoc.PDFZCompress]
1316            pdfFont.ToUnicode = doc.Reference(cmapStream, 'toUnicodeCMap:' + baseFontName)
1317
1318            pdfFont.FontDescriptor = self.face.addSubsetObjects(doc, baseFontName, subset)
1319
1320            # link it in
1321            ref = doc.Reference(pdfFont, internalName)
1322            fontDict = doc.idToObject['BasicFonts'].dict
1323            fontDict[internalName] = pdfFont
1324        del self.state[doc]
1325
1326#preserve the initial values here
1327def _reset():
1328    _cached_ttf_dirs.clear()
1329
1330from reportlab.rl_config import register_reset
1331register_reset(_reset)
1332del register_reset
1333