1#!/usr/bin/env python3
2
3import sys
4import io
5import struct
6from .cmapdb import CMapDB, CMapParser, FileUnicodeMap, CMap
7from .encodingdb import EncodingDB, name2unicode
8from .psparser import PSStackParser
9from .psparser import PSEOF
10from .psparser import LIT, KWD, handle_error
11from .psparser import PSLiteral, literal_name
12from .pdftypes import (PDFException, resolve1, int_value, num_value, list_value, dict_value,
13    stream_value)
14from .fontmetrics import FONT_METRICS
15from .utils import apply_matrix_norm, nunpack, choplist
16
17
18def get_widths(seq):
19    widths = {}
20    r = []
21    for v in seq:
22        if isinstance(v, list):
23            if r:
24                char1 = r[-1]
25                for (i,w) in enumerate(v):
26                    widths[char1+i] = w
27                r = []
28        elif isinstance(v, int):
29            r.append(v)
30            if len(r) == 3:
31                (char1,char2,w) = r
32                for i in range(char1, char2+1):
33                    widths[i] = w
34                r = []
35    return widths
36#assert get_widths([1]) == {}
37#assert get_widths([1,2,3]) == {1:3, 2:3}
38#assert get_widths([1,[2,3],6,[7,8]]) == {1:2,2:3, 6:7,7:8}
39
40def get_widths2(seq):
41    widths = {}
42    r = []
43    for v in seq:
44        if isinstance(v, list):
45            if r:
46                char1 = r[-1]
47                for (i,(w,vx,vy)) in enumerate(choplist(3,v)):
48                    widths[char1+i] = (w,(vx,vy))
49                r = []
50        elif isinstance(v, int):
51            r.append(v)
52            if len(r) == 5:
53                (char1,char2,w,vx,vy) = r
54                for i in range(char1, char2+1):
55                    widths[i] = (w,(vx,vy))
56                r = []
57    return widths
58#assert get_widths2([1]) == {}
59#assert get_widths2([1,2,3,4,5]) == {1:(3,(4,5)), 2:(3,(4,5))}
60#assert get_widths2([1,[2,3,4,5],6,[7,8,9]]) == {1:(2,(3,4)), 6:(7,(8,9))}
61
62
63class FontMetricsDB:
64
65    @classmethod
66    def get_metrics(klass, fontname):
67        return FONT_METRICS[fontname]
68
69
70class Type1FontHeaderParser(PSStackParser):
71
72    KEYWORD_BEGIN = KWD('begin')
73    KEYWORD_END = KWD('end')
74    KEYWORD_DEF = KWD('def')
75    KEYWORD_PUT = KWD('put')
76    KEYWORD_DICT = KWD('dict')
77    KEYWORD_ARRAY = KWD('array')
78    KEYWORD_READONLY = KWD('readonly')
79    KEYWORD_FOR = KWD('for')
80    KEYWORD_FOR = KWD('for')
81
82    def __init__(self, data):
83        PSStackParser.__init__(self, data)
84        self._cid2unicode = {}
85
86    def get_encoding(self):
87        while 1:
88            try:
89                (cid,name) = self.nextobject()
90            except PSEOF:
91                break
92            try:
93                self._cid2unicode[cid] = name2unicode(name)
94            except KeyError:
95                pass
96        return self._cid2unicode
97
98    def do_keyword(self, pos, token):
99        if token is self.KEYWORD_PUT:
100            ((_,key),(_,value)) = self.pop(2)
101            if (isinstance(key, int) and
102                isinstance(value, PSLiteral)):
103                self.add_results((key, literal_name(value)))
104
105
106##  CFFFont
107##  (Format specified in Adobe Technical Note: #5176
108##   "The Compact Font Format Specification")
109##
110NIBBLES = ('0','1','2','3','4','5','6','7','8','9','.','e','e-',None,'-')
111def getdict(data):
112    d = {}
113    fp = io.BytesIO(data)
114    stack = []
115    while 1:
116        c = fp.read(1)
117        if not c: break
118        b0 = ord(c)
119        if b0 <= 21:
120            d[b0] = stack
121            stack = []
122            continue
123        if b0 == 30:
124            s = ''
125            loop = True
126            while loop:
127                b = ord(fp.read(1))
128                for n in (b >> 4, b & 15):
129                    if n == 15:
130                        loop = False
131                    else:
132                        s += NIBBLES[n]
133            value = float(s)
134        elif 32 <= b0 and b0 <= 246:
135            value = b0-139
136        else:
137            b1 = ord(fp.read(1))
138            if 247 <= b0 and b0 <= 250:
139                value = ((b0-247)<<8)+b1+108
140            elif 251 <= b0 and b0 <= 254:
141                value = -((b0-251)<<8)-b1-108
142            else:
143                b2 = ord(fp.read(1))
144                if 128 <= b1: b1 -= 256
145                if b0 == 28:
146                    value = b1<<8 | b2
147                else:
148                    value = b1<<24 | b2<<16 | struct.unpack('>H', fp.read(2))[0]
149        stack.append(value)
150    return d
151
152class CFFFont:
153
154    STANDARD_STRINGS = (
155      '.notdef', 'space', 'exclam', 'quotedbl', 'numbersign',
156      'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
157      'parenright', 'asterisk', 'plus', 'comma', 'hyphen', 'period',
158      'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
159      'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
160      'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
161      'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
162      'U', 'V', 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash',
163      'bracketright', 'asciicircum', 'underscore', 'quoteleft', 'a',
164      'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
165      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
166      'braceleft', 'bar', 'braceright', 'asciitilde', 'exclamdown',
167      'cent', 'sterling', 'fraction', 'yen', 'florin', 'section',
168      'currency', 'quotesingle', 'quotedblleft', 'guillemotleft',
169      'guilsinglleft', 'guilsinglright', 'fi', 'fl', 'endash',
170      'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
171      'quotesinglbase', 'quotedblbase', 'quotedblright',
172      'guillemotright', 'ellipsis', 'perthousand', 'questiondown',
173      'grave', 'acute', 'circumflex', 'tilde', 'macron', 'breve',
174      'dotaccent', 'dieresis', 'ring', 'cedilla', 'hungarumlaut',
175      'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
176      'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash',
177      'oslash', 'oe', 'germandbls', 'onesuperior', 'logicalnot', 'mu',
178      'trademark', 'Eth', 'onehalf', 'plusminus', 'Thorn',
179      'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
180      'threequarters', 'twosuperior', 'registered', 'minus', 'eth',
181      'multiply', 'threesuperior', 'copyright', 'Aacute',
182      'Acircumflex', 'Adieresis', 'Agrave', 'Aring', 'Atilde',
183      'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
184      'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde',
185      'Oacute', 'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde',
186      'Scaron', 'Uacute', 'Ucircumflex', 'Udieresis', 'Ugrave',
187      'Yacute', 'Ydieresis', 'Zcaron', 'aacute', 'acircumflex',
188      'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla', 'eacute',
189      'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
190      'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex',
191      'odieresis', 'ograve', 'otilde', 'scaron', 'uacute',
192      'ucircumflex', 'udieresis', 'ugrave', 'yacute', 'ydieresis',
193      'zcaron', 'exclamsmall', 'Hungarumlautsmall', 'dollaroldstyle',
194      'dollarsuperior', 'ampersandsmall', 'Acutesmall',
195      'parenleftsuperior', 'parenrightsuperior', 'twodotenleader',
196      'onedotenleader', 'zerooldstyle', 'oneoldstyle', 'twooldstyle',
197      'threeoldstyle', 'fouroldstyle', 'fiveoldstyle', 'sixoldstyle',
198      'sevenoldstyle', 'eightoldstyle', 'nineoldstyle',
199      'commasuperior', 'threequartersemdash', 'periodsuperior',
200      'questionsmall', 'asuperior', 'bsuperior', 'centsuperior',
201      'dsuperior', 'esuperior', 'isuperior', 'lsuperior', 'msuperior',
202      'nsuperior', 'osuperior', 'rsuperior', 'ssuperior', 'tsuperior',
203      'ff', 'ffi', 'ffl', 'parenleftinferior', 'parenrightinferior',
204      'Circumflexsmall', 'hyphensuperior', 'Gravesmall', 'Asmall',
205      'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall',
206      'Hsmall', 'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall',
207      'Nsmall', 'Osmall', 'Psmall', 'Qsmall', 'Rsmall', 'Ssmall',
208      'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall', 'Ysmall',
209      'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
210      'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall',
211      'Zcaronsmall', 'Dieresissmall', 'Brevesmall', 'Caronsmall',
212      'Dotaccentsmall', 'Macronsmall', 'figuredash', 'hypheninferior',
213      'Ogoneksmall', 'Ringsmall', 'Cedillasmall', 'questiondownsmall',
214      'oneeighth', 'threeeighths', 'fiveeighths', 'seveneighths',
215      'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
216      'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
217      'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior',
218      'threeinferior', 'fourinferior', 'fiveinferior', 'sixinferior',
219      'seveninferior', 'eightinferior', 'nineinferior',
220      'centinferior', 'dollarinferior', 'periodinferior',
221      'commainferior', 'Agravesmall', 'Aacutesmall',
222      'Acircumflexsmall', 'Atildesmall', 'Adieresissmall',
223      'Aringsmall', 'AEsmall', 'Ccedillasmall', 'Egravesmall',
224      'Eacutesmall', 'Ecircumflexsmall', 'Edieresissmall',
225      'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
226      'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall',
227      'Oacutesmall', 'Ocircumflexsmall', 'Otildesmall',
228      'Odieresissmall', 'OEsmall', 'Oslashsmall', 'Ugravesmall',
229      'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
230      'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000',
231      '001.001', '001.002', '001.003', 'Black', 'Bold', 'Book',
232      'Light', 'Medium', 'Regular', 'Roman', 'Semibold',
233      )
234
235    class INDEX:
236
237        def __init__(self, fp):
238            self.fp = fp
239            self.offsets = []
240            (count, offsize) = struct.unpack(b'>HB', self.fp.read(3))
241            for i in range(count+1):
242                self.offsets.append(nunpack(self.fp.read(offsize)))
243            self.base = self.fp.tell()-1
244            self.fp.seek(self.base+self.offsets[-1])
245
246        def __repr__(self):
247            return '<INDEX: size=%d>' % len(self)
248
249        def __len__(self):
250            return len(self.offsets)-1
251
252        def __getitem__(self, i):
253            self.fp.seek(self.base+self.offsets[i])
254            return self.fp.read(self.offsets[i+1]-self.offsets[i])
255
256        def __iter__(self):
257            return iter( self[i] for i in range(len(self)) )
258
259    def __init__(self, name, fp):
260        self.name = name
261        self.fp = fp
262        # Header
263        (_major,_minor,hdrsize,offsize) = struct.unpack(b'BBBB', self.fp.read(4))
264        self.fp.read(hdrsize-4)
265        # Name INDEX
266        self.name_index = self.INDEX(self.fp)
267        # Top DICT INDEX
268        self.dict_index = self.INDEX(self.fp)
269        # String INDEX
270        self.string_index = self.INDEX(self.fp)
271        # Global Subr INDEX
272        self.subr_index = self.INDEX(self.fp)
273        # Top DICT DATA
274        self.top_dict = getdict(self.dict_index[0])
275        (charset_pos,) = self.top_dict.get(15, [0])
276        (encoding_pos,) = self.top_dict.get(16, [0])
277        (charstring_pos,) = self.top_dict.get(17, [0])
278        # CharStrings
279        self.fp.seek(charstring_pos)
280        self.charstring = self.INDEX(self.fp)
281        self.nglyphs = len(self.charstring)
282        # Encodings
283        self.code2gid = {}
284        self.gid2code = {}
285        self.fp.seek(encoding_pos)
286        format = self.fp.read(1)
287        if format == b'\x00':
288            # Format 0
289            (n,) = struct.unpack(b'B', self.fp.read(1))
290            for (code,gid) in enumerate(struct.unpack(b'B'*n, self.fp.read(n))):
291                self.code2gid[code] = gid
292                self.gid2code[gid] = code
293        elif format == b'\x01':
294            # Format 1
295            (n,) = struct.unpack(b'B', self.fp.read(1))
296            code = 0
297            for i in range(n):
298                (first,nleft) = struct.unpack(b'BB', self.fp.read(2))
299                for gid in range(first,first+nleft+1):
300                    self.code2gid[code] = gid
301                    self.gid2code[gid] = code
302                    code += 1
303        else:
304            raise ValueError('unsupported encoding format: %r' % format)
305        # Charsets
306        self.name2gid = {}
307        self.gid2name = {}
308        self.fp.seek(charset_pos)
309        format = self.fp.read(1)
310        if format == '\x00':
311            # Format 0
312            n = self.nglyphs-1
313            for (gid,sid) in enumerate(struct.unpack(b'>'+b'H'*n, self.fp.read(2*n))):
314                gid += 1
315                name = self.getstr(sid)
316                self.name2gid[name] = gid
317                self.gid2name[gid] = name
318        elif format == '\x01':
319            # Format 1
320            (n,) = struct.unpack(b'B', self.fp.read(1))
321            sid = 0
322            for i in range(n):
323                (first,nleft) = struct.unpack(b'BB', self.fp.read(2))
324                for gid in range(first,first+nleft+1):
325                    name = self.getstr(sid)
326                    self.name2gid[name] = gid
327                    self.gid2name[gid] = name
328                    sid += 1
329        elif format == '\x02':
330            # Format 2
331            assert 0
332        else:
333            raise ValueError('unsupported charset format: %r' % format)
334        #print self.code2gid
335        #print self.name2gid
336        #assert 0
337
338    def getstr(self, sid):
339        if sid < len(self.STANDARD_STRINGS):
340            return self.STANDARD_STRINGS[sid]
341        return self.string_index[sid-len(self.STANDARD_STRINGS)]
342
343
344class TrueTypeFont:
345
346    class CMapNotFound(Exception): pass
347
348    def __init__(self, name, fp):
349        self.name = name
350        self.fp = fp
351        self.tables = {}
352        self.fonttype = fp.read(4)
353        (ntables, _1, _2, _3) = struct.unpack(b'>HHHH', fp.read(8))
354        for _ in range(ntables):
355            (name, tsum, offset, length) = struct.unpack(b'>4sLLL', fp.read(16))
356            self.tables[name] = (offset, length)
357
358    def create_unicode_map(self):
359        if 'cmap' not in self.tables:
360            raise TrueTypeFont.CMapNotFound
361        (base_offset, length) = self.tables['cmap']
362        fp = self.fp
363        fp.seek(base_offset)
364        (version, nsubtables) = struct.unpack(b'>HH', fp.read(4))
365        subtables = []
366        for i in range(nsubtables):
367            subtables.append(struct.unpack(b'>HHL', fp.read(8)))
368        char2gid = {}
369        # Only supports subtable type 0, 2 and 4.
370        for (_1, _2, st_offset) in subtables:
371            fp.seek(base_offset+st_offset)
372            (fmttype, fmtlen, fmtlang) = struct.unpack(b'>HHH', fp.read(6))
373            if fmttype == 0:
374                char2gid.update(enumerate(struct.unpack(b'>256B', fp.read(256))))
375            elif fmttype == 2:
376                subheaderkeys = struct.unpack(b'>256H', fp.read(512))
377                firstbytes = [0]*8192
378                for (i,k) in enumerate(subheaderkeys):
379                    firstbytes[k/8] = i
380                nhdrs = max(subheaderkeys)/8 + 1
381                hdrs = []
382                for i in range(nhdrs):
383                    (firstcode,entcount,delta,offset) = struct.unpack(b'>HHhH', fp.read(8))
384                    hdrs.append((i,firstcode,entcount,delta,fp.tell()-2+offset))
385                for (i,firstcode,entcount,delta,pos) in hdrs:
386                    if not entcount: continue
387                    first = firstcode + (firstbytes[i] << 8)
388                    fp.seek(pos)
389                    for c in range(entcount):
390                        gid = struct.unpack(b'>H', fp.read(2))
391                        if gid:
392                            gid += delta
393                        char2gid[first+c] = gid
394            elif fmttype == 4:
395                (segcount, _1, _2, _3) = struct.unpack(b'>HHHH', fp.read(8))
396                segcount /= 2
397                ecs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount))
398                fp.read(2)
399                scs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount))
400                idds = struct.unpack(b'>%dh' % segcount, fp.read(2*segcount))
401                pos = fp.tell()
402                idrs = struct.unpack(b'>%dH' % segcount, fp.read(2*segcount))
403                for (ec,sc,idd,idr) in zip(ecs, scs, idds, idrs):
404                    if idr:
405                        fp.seek(pos+idr)
406                        for c in range(sc, ec+1):
407                            char2gid[c] = (struct.unpack(b'>H', fp.read(2))[0] + idd) & 0xffff
408                    else:
409                        for c in range(sc, ec+1):
410                            char2gid[c] = (c + idd) & 0xffff
411            else:
412                assert 0
413        # create unicode map
414        unicode_map = FileUnicodeMap()
415        for (char,gid) in char2gid.items():
416            unicode_map.add_cid2unichr(gid, char)
417        return unicode_map
418
419
420##  Fonts
421##
422
423class PDFFontError(PDFException): pass
424class PDFUnicodeNotDefined(PDFFontError): pass
425
426LITERAL_STANDARD_ENCODING = LIT('StandardEncoding')
427LITERAL_TYPE1C = LIT('Type1C')
428
429
430class PDFFont:
431
432    def __init__(self, descriptor, widths, default_width=None):
433        self.descriptor = descriptor
434        self.widths = widths
435        self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
436        if isinstance(self.fontname, PSLiteral):
437            self.fontname = literal_name(self.fontname)
438        self.flags = int_value(descriptor.get('Flags', 0))
439        self.ascent = num_value(descriptor.get('Ascent', 0))
440        self.descent = num_value(descriptor.get('Descent', 0))
441        self.italic_angle = num_value(descriptor.get('ItalicAngle', 0))
442        self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0))
443        self.leading = num_value(descriptor.get('Leading', 0))
444        self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
445        self.hscale = self.vscale = .001
446
447    def __repr__(self):
448        return '<PDFFont>'
449
450    def is_vertical(self):
451        return False
452
453    def is_multibyte(self):
454        return False
455
456    def decode(self, s):
457        if isinstance(s, str):
458            return list(map(ord, s))
459        else: # it's already bytes
460            return s
461
462    def get_ascent(self):
463        return self.ascent * self.vscale
464    def get_descent(self):
465        return self.descent * self.vscale
466
467    def get_width(self):
468        w = self.bbox[2]-self.bbox[0]
469        if w == 0:
470            w = -self.default_width
471        return w * self.hscale
472    def get_height(self):
473        h = self.bbox[3]-self.bbox[1]
474        if h == 0:
475            h = self.ascent - self.descent
476        return h * self.vscale
477
478    def char_width(self, cid):
479        return self.widths.get(cid, self.default_width) * self.hscale
480
481    def char_disp(self, cid):
482        return 0
483
484    def string_width(self, s):
485        return sum( self.char_width(cid) for cid in self.decode(s) )
486
487
488class PDFSimpleFont(PDFFont):
489
490    def __init__(self, descriptor, widths, spec):
491        # Font encoding is specified either by a name of
492        # built-in encoding or a dictionary that describes
493        # the differences.
494        if 'Encoding' in spec:
495            encoding = resolve1(spec['Encoding'])
496        else:
497            encoding = LITERAL_STANDARD_ENCODING
498        if isinstance(encoding, dict):
499            name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
500            diff = list_value(encoding.get('Differences', None))
501            self.cid2unicode = EncodingDB.get_encoding(name, diff)
502        else:
503            self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
504        self.unicode_map = None
505        if 'ToUnicode' in spec:
506            strm = stream_value(spec['ToUnicode'])
507            self.unicode_map = FileUnicodeMap()
508            CMapParser(self.unicode_map, io.BytesIO(strm.get_data())).run()
509        PDFFont.__init__(self, descriptor, widths)
510
511    def to_unichr(self, cid):
512        if self.unicode_map:
513            try:
514                return self.unicode_map.get_unichr(cid)
515            except KeyError:
516                pass
517        try:
518            return self.cid2unicode[cid]
519        except KeyError:
520            raise PDFUnicodeNotDefined(None, cid)
521
522class PDFType1Font(PDFSimpleFont):
523
524    def __init__(self, rsrcmgr, spec):
525        try:
526            self.basefont = literal_name(spec['BaseFont'])
527        except KeyError:
528            handle_error(PDFFontError, 'BaseFont is missing')
529            self.basefont = 'unknown'
530        try:
531            (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
532        except KeyError:
533            descriptor = dict_value(spec.get('FontDescriptor', {}))
534            firstchar = int_value(spec.get('FirstChar', 0))
535            lastchar = int_value(spec.get('LastChar', 255))
536            widths = list_value(spec.get('Widths', [0]*256))
537            widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
538        PDFSimpleFont.__init__(self, descriptor, widths, spec)
539        if 'Encoding' not in spec and 'FontFile' in descriptor:
540            # try to recover the missing encoding info from the font file.
541            self.fontfile = stream_value(descriptor.get('FontFile'))
542            length1 = int_value(self.fontfile['Length1'])
543            data = self.fontfile.get_data()[:length1]
544            parser = Type1FontHeaderParser(io.BytesIO(data))
545            self.cid2unicode = parser.get_encoding()
546
547    def __repr__(self):
548        return '<PDFType1Font: basefont=%r>' % self.basefont
549
550class PDFTrueTypeFont(PDFType1Font):
551
552    def __repr__(self):
553        return '<PDFTrueTypeFont: basefont=%r>' % self.basefont
554
555class PDFType3Font(PDFSimpleFont):
556
557    def __init__(self, rsrcmgr, spec):
558        firstchar = int_value(spec.get('FirstChar', 0))
559        lastchar = int_value(spec.get('LastChar', 0))
560        widths = list_value(spec.get('Widths', [0]*256))
561        widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths))
562        if 'FontDescriptor' in spec:
563            descriptor = dict_value(spec['FontDescriptor'])
564        else:
565            descriptor = {'Ascent':0, 'Descent':0,
566                          'FontBBox':spec['FontBBox']}
567        PDFSimpleFont.__init__(self, descriptor, widths, spec)
568        self.matrix = tuple(list_value(spec.get('FontMatrix')))
569        (_,self.descent,_,self.ascent) = self.bbox
570        (self.hscale,self.vscale) = apply_matrix_norm(self.matrix, (1,1))
571
572    def __repr__(self):
573        return '<PDFType3Font>'
574
575
576class PDFCIDFont(PDFFont):
577
578    def __init__(self, rsrcmgr, spec):
579        try:
580            self.basefont = literal_name(spec['BaseFont'])
581        except KeyError:
582            handle_error(PDFFontError, 'BaseFont is missing')
583            self.basefont = 'unknown'
584        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
585        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
586                                    self.cidsysteminfo.get('Ordering', 'unknown'))
587        try:
588            name = literal_name(spec['Encoding'])
589        except KeyError:
590            handle_error(PDFFontError, 'Encoding is unspecified')
591            name = 'unknown'
592        try:
593            self.cmap = CMapDB.get_cmap(name)
594        except CMapDB.CMapNotFound as e:
595            handle_error(PDFFontError, str(e))
596            self.cmap = CMap()
597        try:
598            descriptor = dict_value(spec['FontDescriptor'])
599        except KeyError:
600            handle_error(PDFFontError, 'FontDescriptor is missing')
601            descriptor = {}
602        ttf = None
603        if 'FontFile2' in descriptor:
604            self.fontfile = stream_value(descriptor.get('FontFile2'))
605            ttf = TrueTypeFont(self.basefont,
606                               io.BytesIO(self.fontfile.get_data()))
607        self.unicode_map = None
608        if 'ToUnicode' in spec:
609            strm = stream_value(spec['ToUnicode'])
610            self.unicode_map = FileUnicodeMap()
611            CMapParser(self.unicode_map, io.BytesIO(strm.get_data())).run()
612        elif self.cidcoding == 'Adobe-Identity':
613            if ttf:
614                try:
615                    self.unicode_map = ttf.create_unicode_map()
616                except TrueTypeFont.CMapNotFound:
617                    pass
618        else:
619            try:
620                self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
621            except CMapDB.CMapNotFound as e:
622                pass
623
624        self.vertical = self.cmap.is_vertical()
625        if self.vertical:
626            # writing mode: vertical
627            widths = get_widths2(list_value(spec.get('W2', [])))
628            self.disps = dict( (cid,(vx,vy)) for (cid,(_,(vx,vy))) in widths.items() )
629            (vy,w) = spec.get('DW2', [880, -1000])
630            self.default_disp = (None,vy)
631            widths = dict( (cid,w) for (cid,(w,_)) in widths.items() )
632            default_width = w
633        else:
634            # writing mode: horizontal
635            self.disps = {}
636            self.default_disp = 0
637            widths = get_widths(list_value(spec.get('W', [])))
638            default_width = spec.get('DW', 1000)
639        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
640
641    def __repr__(self):
642        return '<PDFCIDFont: basefont=%r, cidcoding=%r>' % (self.basefont, self.cidcoding)
643
644    def is_vertical(self):
645        return self.vertical
646
647    def is_multibyte(self):
648        return True
649
650    def decode(self, bytes):
651        return self.cmap.decode(bytes)
652
653    def char_disp(self, cid):
654        "Returns an integer for horizontal fonts, a tuple for vertical fonts."
655        return self.disps.get(cid, self.default_disp)
656
657    def to_unichr(self, cid):
658        try:
659            if not self.unicode_map:
660                raise KeyError(cid)
661            return self.unicode_map.get_unichr(cid)
662        except KeyError:
663            raise PDFUnicodeNotDefined(self.cidcoding, cid)
664
665
666def main(argv):
667    for fname in argv[1:]:
668        fp = io.open(fname, 'rb')
669        #font = TrueTypeFont(fname, fp)
670        font = CFFFont(fname, fp)
671        print(font)
672        fp.close()
673
674if __name__ == '__main__':
675    sys.exit(main(sys.argv))
676