1#Copyright ReportLab Europe Ltd. 2000-2017 2#see license.txt for license details 3__version__ = '$Id$' 4__doc__="""TrueType font support 5 6This defines classes to represent TrueType fonts. They know how to calculate 7their own width and how to write themselves into PDF files. They support 8subsetting and embedding and can represent all 16-bit Unicode characters. 9 10Note on dynamic fonts 11--------------------- 12 13Usually a Font in ReportLab corresponds to a fixed set of PDF objects (Font, 14FontDescriptor, Encoding). But with dynamic font subsetting a single TTFont 15will result in a number of Font/FontDescriptor/Encoding object sets, and the 16contents of those will depend on the actual characters used for printing. 17 18To support dynamic font subsetting a concept of "dynamic font" was introduced. 19Dynamic Fonts have a _dynamicFont attribute set to 1. 20 21Dynamic fonts have the following additional functions:: 22 23 def splitString(self, text, doc): 24 '''Splits text into a number of chunks, each of which belongs to a 25 single subset. Returns a list of tuples (subset, string). Use 26 subset numbers with getSubsetInternalName. Doc is used to identify 27 a document so that different documents may have different dynamically 28 constructed subsets.''' 29 30 def getSubsetInternalName(self, subset, doc): 31 '''Returns the name of a PDF Font object corresponding to a given 32 subset of this dynamic font. Use this function instead of 33 PDFDocument.getInternalFontName.''' 34 35You must never call PDFDocument.getInternalFontName for dynamic fonts. 36 37If you have a traditional static font, mapping to PDF text output operators 38is simple:: 39 40 '%s 14 Tf (%s) Tj' % (getInternalFontName(psfontname), text) 41 42If you have a dynamic font, use this instead:: 43 44 for subset, chunk in font.splitString(text, doc): 45 '%s 14 Tf (%s) Tj' % (font.getSubsetInternalName(subset, doc), chunk) 46 47(Tf is a font setting operator and Tj is a text ouput operator. You should 48also escape invalid characters in Tj argument, see TextObject._formatText. 49Oh, and that 14 up there is font size.) 50 51Canvas and TextObject have special support for dynamic fonts. 52""" 53 54from struct import pack, unpack, error as structError 55from reportlab.lib.utils import getBytesIO, isPy3, bytestr, isUnicode, char2int, bytesT, isStr, isBytes 56from reportlab.pdfbase import pdfmetrics, pdfdoc 57from reportlab import rl_config, xrange, ascii 58from reportlab.lib.rl_accel import hex32, add32, calcChecksum, instanceStringWidthTTF 59from collections import namedtuple 60import os, time 61 62class TTFError(pdfdoc.PDFError): 63 "TrueType font exception" 64 pass 65 66if isPy3: 67 def SUBSETN(n,table=bytes.maketrans(b'0123456789',b'ABCDEFGIJK')): 68 return bytes('%6.6d'%n,'ASCII').translate(table) 69else: 70 import string 71 def SUBSETN(n,table=string.maketrans(b'0123456789',b'ABCDEFGIJK'),translate=string.translate): 72 return translate('%6.6d'%n,table) 73# 74# Helpers 75# 76def makeToUnicodeCMap(fontname, subset): 77 """Creates a ToUnicode CMap for a given subset. See Adobe 78 _PDF_Reference (ISBN 0-201-75839-3) for more information.""" 79 cmap = [ 80 "/CIDInit /ProcSet findresource begin", 81 "12 dict begin", 82 "begincmap", 83 "/CIDSystemInfo", 84 "<< /Registry (%s)" % fontname, 85 "/Ordering (%s)" % fontname, 86 "/Supplement 0", 87 ">> def", 88 "/CMapName /%s def" % fontname, 89 "/CMapType 2 def", 90 "1 begincodespacerange", 91 "<00> <%02X>" % (len(subset) - 1), 92 "endcodespacerange", 93 "%d beginbfchar" % len(subset) 94 ] + ["<%02X> <%04X>" % (i,v) for i,v in enumerate(subset)] + [ 95 "endbfchar", 96 "endcmap", 97 "CMapName currentdict /CMap defineresource pop", 98 "end", 99 "end" 100 ] 101 return '\n'.join(cmap) 102 103def splice(stream, offset, value): 104 """Splices the given value into stream at the given offset and 105 returns the resulting stream (the original is unchanged)""" 106 return stream[:offset] + value + stream[offset + len(value):] 107 108def _set_ushort(stream, offset, value): 109 """Writes the given unsigned short value into stream at the given 110 offset and returns the resulting stream (the original is unchanged)""" 111 return splice(stream, offset, pack(">H", value)) 112# 113# TrueType font handling 114# 115 116GF_ARG_1_AND_2_ARE_WORDS = 1 << 0 117GF_ARGS_ARE_XY_VALUES = 1 << 1 118GF_ROUND_XY_TO_GRID = 1 << 2 119GF_WE_HAVE_A_SCALE = 1 << 3 120GF_RESERVED = 1 << 4 121GF_MORE_COMPONENTS = 1 << 5 122GF_WE_HAVE_AN_X_AND_Y_SCALE = 1 << 6 123GF_WE_HAVE_A_TWO_BY_TWO = 1 << 7 124GF_WE_HAVE_INSTRUCTIONS = 1 << 8 125GF_USE_MY_METRICS = 1 << 9 126GF_OVERLAP_COMPOUND = 1 << 10 127GF_SCALED_COMPONENT_OFFSET = 1 << 11 128GF_UNSCALED_COMPONENT_OFFSET = 1 << 12 129 130 131_cached_ttf_dirs={} 132def _ttf_dirs(*roots): 133 R = _cached_ttf_dirs.get(roots,None) 134 if R is None: 135 join = os.path.join 136 realpath = os.path.realpath 137 R = [] 138 aR = R.append 139 for root in roots: 140 for r, d, f in os.walk(root,followlinks=True): 141 s = realpath(r) 142 if s not in R: aR(s) 143 for s in d: 144 s = realpath(join(r,s)) 145 if s not in R: aR(s) 146 _cached_ttf_dirs[roots] = R 147 return R 148 149def TTFOpenFile(fn): 150 '''Opens a TTF file possibly after searching TTFSearchPath 151 returns (filename,file) 152 ''' 153 from reportlab.lib.utils import rl_isfile, open_for_read 154 try: 155 f = open_for_read(fn,'rb') 156 return fn, f 157 except IOError: 158 import os 159 if not os.path.isabs(fn): 160 for D in _ttf_dirs(*rl_config.TTFSearchPath): 161 tfn = os.path.join(D,fn) 162 if rl_isfile(tfn): 163 f = open_for_read(tfn,'rb') 164 return tfn, f 165 raise TTFError('Can\'t open file "%s"' % fn) 166 167class TTFontParser: 168 "Basic TTF file parser" 169 ttfVersions = (0x00010000,0x74727565,0x74746366) 170 ttcVersions = (0x00010000,0x00020000) 171 fileKind='TTF' 172 173 def __init__(self, file, validate=0,subfontIndex=0): 174 """Loads and parses a TrueType font file. file can be a filename or a 175 file object. If validate is set to a false values, skips checksum 176 validation. This can save time, especially if the font is large. 177 """ 178 self.validate = validate 179 self.readFile(file) 180 isCollection = self.readHeader() 181 if isCollection: 182 self.readTTCHeader() 183 self.getSubfont(subfontIndex) 184 else: 185 if self.validate: self.checksumFile() 186 self.readTableDirectory() 187 self.subfontNameX = b'' 188 189 def readTTCHeader(self): 190 self.ttcVersion = self.read_ulong() 191 self.fileKind = 'TTC' 192 self.ttfVersions = self.ttfVersions[:-1] 193 if self.ttcVersion not in self.ttcVersions: 194 raise TTFError('"%s" is not a %s file: can\'t read version 0x%8.8x' %(self.filename,self.fileKind,self.ttcVersion)) 195 self.numSubfonts = self.read_ulong() 196 self.subfontOffsets = [] 197 a = self.subfontOffsets.append 198 for i in xrange(self.numSubfonts): 199 a(self.read_ulong()) 200 201 def getSubfont(self,subfontIndex): 202 if self.fileKind!='TTC': 203 raise TTFError('"%s" is not a TTC file: use this method' % (self.filename,self.fileKind)) 204 try: 205 pos = self.subfontOffsets[subfontIndex] 206 except IndexError: 207 raise TTFError('TTC file "%s": bad subfontIndex %s not in [0,%d]' % (self.filename,subfontIndex,self.numSubfonts-1)) 208 self.seek(pos) 209 self.readHeader() 210 self.readTableDirectory() 211 self.subfontNameX = bytestr('-'+str(subfontIndex)) 212 213 def readTableDirectory(self): 214 try: 215 self.numTables = self.read_ushort() 216 self.searchRange = self.read_ushort() 217 self.entrySelector = self.read_ushort() 218 self.rangeShift = self.read_ushort() 219 220 # Read table directory 221 self.table = {} 222 self.tables = [] 223 for n in xrange(self.numTables): 224 record = {} 225 record['tag'] = self.read_tag() 226 record['checksum'] = self.read_ulong() 227 record['offset'] = self.read_ulong() 228 record['length'] = self.read_ulong() 229 self.tables.append(record) 230 self.table[record['tag']] = record 231 except: 232 raise TTFError('Corrupt %s file "%s" cannot read Table Directory' % (self.fileKind, self.filename)) 233 if self.validate: self.checksumTables() 234 235 def readHeader(self): 236 '''read the sfnt header at the current position''' 237 try: 238 self.version = version = self.read_ulong() 239 except: 240 raise TTFError('"%s" is not a %s file: can\'t read version' %(self.filename,self.fileKind)) 241 242 if version==0x4F54544F: 243 raise TTFError('%s file "%s": postscript outlines are not supported'%(self.fileKind,self.filename)) 244 245 if version not in self.ttfVersions: 246 raise TTFError('Not a recognized TrueType font: version=0x%8.8X' % version) 247 return version==self.ttfVersions[-1] 248 249 def readFile(self,f): 250 if not hasattr(self,'_ttf_data'): 251 if hasattr(f,'read'): 252 self.filename = getattr(f,'name','(ttf)') #good idea Marius 253 self._ttf_data = f.read() 254 else: 255 self.filename, f = TTFOpenFile(f) 256 self._ttf_data = f.read() 257 f.close() 258 self._pos = 0 259 260 def checksumTables(self): 261 # Check the checksums for all tables 262 for t in self.tables: 263 table = self.get_chunk(t['offset'], t['length']) 264 checksum = calcChecksum(table) 265 if t['tag'] == 'head': 266 adjustment = unpack('>l', table[8:8+4])[0] 267 checksum = add32(checksum, -adjustment) 268 xchecksum = t['checksum'] 269 if xchecksum != checksum: 270 raise TTFError('TTF file "%s": invalid checksum %s table: %s (expected %s)' % (self.filename,hex32(checksum),t['tag'],hex32(xchecksum))) 271 272 def checksumFile(self): 273 # Check the checksums for the whole file 274 checksum = calcChecksum(self._ttf_data) 275 if 0xB1B0AFBA!=checksum: 276 raise TTFError('TTF file "%s": invalid checksum %s (expected 0xB1B0AFBA) len: %d &3: %d' % (self.filename,hex32(checksum),len(self._ttf_data),(len(self._ttf_data)&3))) 277 278 def get_table_pos(self, tag): 279 "Returns the offset and size of a given TTF table." 280 offset = self.table[tag]['offset'] 281 length = self.table[tag]['length'] 282 return (offset, length) 283 284 def seek(self, pos): 285 "Moves read pointer to a given offset in file." 286 self._pos = pos 287 288 def skip(self, delta): 289 "Skip the given number of bytes." 290 self._pos = self._pos + delta 291 292 def seek_table(self, tag, offset_in_table = 0): 293 """Moves read pointer to the given offset within a given table and 294 returns absolute offset of that position in the file.""" 295 self._pos = self.get_table_pos(tag)[0] + offset_in_table 296 return self._pos 297 298 if isPy3: 299 def read_tag(self): 300 "Read a 4-character tag" 301 self._pos += 4 302 return str(self._ttf_data[self._pos - 4:self._pos],'utf8') 303 304 def get_chunk(self, pos, length): 305 "Return a chunk of raw data at given position" 306 return bytes(self._ttf_data[pos:pos+length]) 307 308 def read_uint8(self): 309 self._pos += 1 310 return int(self._ttf_data[self._pos-1]) 311 else: 312 def read_tag(self): 313 "Read a 4-character tag" 314 self._pos += 4 315 return self._ttf_data[self._pos - 4:self._pos] 316 317 def get_chunk(self, pos, length): 318 "Return a chunk of raw data at given position" 319 return self._ttf_data[pos:pos+length] 320 321 def read_uint8(self): 322 self._pos += 1 323 return ord(self._ttf_data[self._pos-1]) 324 325 def read_ushort(self): 326 "Reads an unsigned short" 327 self._pos += 2 328 return unpack('>H',self._ttf_data[self._pos-2:self._pos])[0] 329 330 def read_ulong(self): 331 "Reads an unsigned long" 332 self._pos += 4 333 return unpack('>L',self._ttf_data[self._pos - 4:self._pos])[0] 334 335 def read_short(self): 336 "Reads a signed short" 337 self._pos += 2 338 try: 339 return unpack('>h',self._ttf_data[self._pos-2:self._pos])[0] 340 except structError as error: 341 raise TTFError(error) 342 343 def get_ushort(self, pos): 344 "Return an unsigned short at given position" 345 return unpack('>H',self._ttf_data[pos:pos+2])[0] 346 347 def get_ulong(self, pos): 348 "Return an unsigned long at given position" 349 return unpack('>L',self._ttf_data[pos:pos+4])[0] 350 351 def get_table(self, tag): 352 "Return the given TTF table" 353 pos, length = self.get_table_pos(tag) 354 return self._ttf_data[pos:pos+length] 355 356class TTFontMaker: 357 "Basic TTF file generator" 358 359 def __init__(self): 360 "Initializes the generator." 361 self.tables = {} 362 363 def add(self, tag, data): 364 "Adds a table to the TTF file." 365 if tag == 'head': 366 data = splice(data, 8, b'\0\0\0\0') 367 self.tables[tag] = data 368 369 def makeStream(self): 370 "Finishes the generation and returns the TTF file as a string" 371 stm = getBytesIO() 372 write = stm.write 373 374 tables = self.tables 375 numTables = len(tables) 376 searchRange = 1 377 entrySelector = 0 378 while searchRange * 2 <= numTables: 379 searchRange = searchRange * 2 380 entrySelector = entrySelector + 1 381 searchRange = searchRange * 16 382 rangeShift = numTables * 16 - searchRange 383 384 # Header 385 write(pack(">lHHHH", 0x00010000, numTables, searchRange, 386 entrySelector, rangeShift)) 387 388 # Table directory 389 offset = 12 + numTables * 16 390 wStr = (lambda x:write(bytes(tag,'latin1'))) if isPy3 else write 391 tables_items = list(sorted(tables.items())) 392 for tag, data in tables_items: 393 if tag == 'head': 394 head_start = offset 395 checksum = calcChecksum(data) 396 wStr(tag) 397 write(pack(">LLL", checksum, offset, len(data))) 398 paddedLength = (len(data)+3)&~3 399 offset = offset + paddedLength 400 401 # Table data 402 for tag, data in tables_items: 403 data += b"\0\0\0" 404 write(data[:len(data)&~3]) 405 406 checksum = calcChecksum(stm.getvalue()) 407 checksum = add32(0xB1B0AFBA, -checksum) 408 stm.seek(head_start + 8) 409 write(pack('>L', checksum)) 410 411 return stm.getvalue() 412 413#this is used in the cmap encoding fmt==2 case 414CMapFmt2SubHeader = namedtuple('CMapFmt2SubHeader', 'firstCode entryCount idDelta idRangeOffset') 415 416class TTFNameBytes(bytesT): 417 '''class used to return named strings''' 418 def __new__(cls,b,enc='utf8'): 419 try: 420 ustr = b.decode(enc) 421 except: 422 ustr = b.decode('latin1') 423 self = bytesT.__new__(cls,ustr.encode('utf8')) 424 self.ustr = ustr 425 return self 426 427class TTFontFile(TTFontParser): 428 "TTF file parser and generator" 429 _agfnc = 0 430 _agfnm = {} 431 432 def __init__(self, file, charInfo=1, validate=0,subfontIndex=0): 433 """Loads and parses a TrueType font file. 434 435 file can be a filename or a file object. If validate is set to a false 436 values, skips checksum validation. This can save time, especially if 437 the font is large. See TTFontFile.extractInfo for more information. 438 """ 439 if isStr(subfontIndex): #bytes or unicode 440 sfi = 0 441 __dict__ = self.__dict__.copy() 442 while True: 443 TTFontParser.__init__(self, file, validate=validate,subfontIndex=sfi) 444 numSubfonts = self.numSubfonts = self.read_ulong() 445 self.extractInfo(charInfo) 446 if (isBytes(subfontIndex) and subfontIndex==self.name 447 or subfontIndex==self.name.ustr): #we found it 448 return 449 if not sfi: 450 __dict__.update(dict(_ttf_data=self._ttf_data, filename=self.filename)) 451 sfi += 1 452 if sfi>=numSubfonts: 453 raise ValueError('cannot find %r subfont %r' % (self.filename, subfontIndex)) 454 self.__dict__.clear() 455 self.__dict__.update(__dict__) 456 else: 457 TTFontParser.__init__(self, file, validate=validate,subfontIndex=subfontIndex) 458 self.extractInfo(charInfo) 459 460 def extractInfo(self, charInfo=1): 461 """ 462 Extract typographic information from the loaded font file. 463 464 The following attributes will be set:: 465 466 name PostScript font name 467 flags Font flags 468 ascent Typographic ascender in 1/1000ths of a point 469 descent Typographic descender in 1/1000ths of a point 470 capHeight Cap height in 1/1000ths of a point (0 if not available) 471 bbox Glyph bounding box [l,t,r,b] in 1/1000ths of a point 472 _bbox Glyph bounding box [l,t,r,b] in unitsPerEm 473 unitsPerEm Glyph units per em 474 italicAngle Italic angle in degrees ccw 475 stemV stem weight in 1/1000ths of a point (approximate) 476 477 If charInfo is true, the following will also be set:: 478 479 defaultWidth default glyph width in 1/1000ths of a point 480 charWidths dictionary of character widths for every supported UCS character 481 code 482 483 This will only work if the font has a Unicode cmap (platform 3, 484 encoding 1, format 4 or platform 0 any encoding format 4). Setting 485 charInfo to false avoids this requirement 486 487 """ 488 # name - Naming table 489 name_offset = self.seek_table("name") 490 format = self.read_ushort() 491 if format != 0: 492 raise TTFError("Unknown name table format (%d)" % format) 493 numRecords = self.read_ushort() 494 string_data_offset = name_offset + self.read_ushort() 495 names = {1:None,2:None,3:None,4:None,6:None} 496 K = list(names.keys()) 497 nameCount = len(names) 498 for i in xrange(numRecords): 499 platformId = self.read_ushort() 500 encodingId = self.read_ushort() 501 languageId = self.read_ushort() 502 nameId = self.read_ushort() 503 length = self.read_ushort() 504 offset = self.read_ushort() 505 if nameId not in K: continue 506 N = None 507 if platformId == 3 and encodingId == 1 and languageId == 0x409: # Microsoft, Unicode, US English, PS Name 508 opos = self._pos 509 try: 510 self.seek(string_data_offset + offset) 511 if length % 2 != 0: 512 raise TTFError("PostScript name is UTF-16BE string of odd length") 513 N = TTFNameBytes(self.get_chunk(string_data_offset + offset, length),'utf_16_be') 514 finally: 515 self._pos = opos 516 elif platformId == 1 and encodingId == 0 and languageId == 0: # Macintosh, Roman, English, PS Name 517 # According to OpenType spec, if PS name exists, it must exist 518 # both in MS Unicode and Macintosh Roman formats. Apparently, 519 # you can find live TTF fonts which only have Macintosh format. 520 N = TTFNameBytes(self.get_chunk(string_data_offset + offset, length),'mac_roman') 521 if N and names[nameId]==None: 522 names[nameId] = N 523 nameCount -= 1 524 if nameCount==0: break 525 if names[6] is not None: 526 psName = names[6] 527 elif names[4] is not None: 528 psName = names[4] 529 # Fine, one last try before we bail. 530 elif names[1] is not None: 531 psName = names[1] 532 else: 533 psName = None 534 535 # Don't just assume, check for None since some shoddy fonts cause crashes here... 536 if not psName: 537 if rl_config.autoGenerateTTFMissingTTFName: 538 fn = self.filename 539 if fn: 540 bfn = os.path.splitext(os.path.basename(fn))[0] 541 if not fn: 542 psName = bytestr('_RL_%s_%s_TTF' % (time.time(), self.__class__._agfnc)) 543 self.__class__._agfnc += 1 544 else: 545 psName = self._agfnm.get(fn,'') 546 if not psName: 547 if bfn: 548 psName = bytestr('_RL_%s_TTF' % bfn) 549 else: 550 psName = bytestr('_RL_%s_%s_TTF' % (time.time(), self.__class__._agfnc)) 551 self.__class__._agfnc += 1 552 self._agfnm[fn] = psName 553 else: 554 raise TTFError("Could not find PostScript font name") 555 556 psName = psName.__class__(psName.replace(b" ", b"-")) #Dinu Gherman's fix for font names with spaces 557 558 for c in psName: 559 if char2int(c)>126 or c in b' [](){}<>/%': 560 raise TTFError("psName=%r contains invalid character %s" % (psName,ascii(c))) 561 self.name = psName 562 self.familyName = names[1] or psName 563 self.styleName = names[2] or 'Regular' 564 self.fullName = names[4] or psName 565 self.uniqueFontID = names[3] or psName 566 567 # head - Font header table 568 try: 569 self.seek_table("head") 570 except: 571 raise TTFError('head table not found ttf name=%s' % self.name) 572 ver_maj, ver_min = self.read_ushort(), self.read_ushort() 573 if ver_maj != 1: 574 raise TTFError('Unknown head table version %d.%04x' % (ver_maj, ver_min)) 575 self.fontRevision = self.read_ushort(), self.read_ushort() 576 577 self.skip(4) 578 magic = self.read_ulong() 579 if magic != 0x5F0F3CF5: 580 raise TTFError('Invalid head table magic %04x' % magic) 581 self.skip(2) 582 self.unitsPerEm = unitsPerEm = self.read_ushort() 583 scale = lambda x, unitsPerEm=unitsPerEm: x * 1000. / unitsPerEm 584 self.skip(16) 585 xMin = self.read_short() 586 yMin = self.read_short() 587 xMax = self.read_short() 588 yMax = self.read_short() 589 self.bbox = list(map(scale, [xMin, yMin, xMax, yMax])) 590 self.skip(3*2) 591 indexToLocFormat = self.read_ushort() 592 glyphDataFormat = self.read_ushort() 593 594 # OS/2 - OS/2 and Windows metrics table 595 # (needs data from head table) 596 subsettingAllowed = True 597 if "OS/2" in self.table: 598 self.seek_table("OS/2") 599 version = self.read_ushort() 600 self.skip(2) 601 usWeightClass = self.read_ushort() 602 self.skip(2) 603 fsType = self.read_ushort() 604 if fsType==0x0002 or (fsType & 0x0300): 605 subsettingAllowed = os.path.basename(self.filename) not in rl_config.allowTTFSubsetting 606 self.skip(58) #11*2 + 10 + 4*4 + 4 + 3*2 607 sTypoAscender = self.read_short() 608 sTypoDescender = self.read_short() 609 self.ascent = scale(sTypoAscender) # XXX: for some reason it needs to be multiplied by 1.24--1.28 610 self.descent = scale(sTypoDescender) 611 612 if version > 1: 613 self.skip(16) #3*2 + 2*4 + 2 614 sCapHeight = self.read_short() 615 self.capHeight = scale(sCapHeight) 616 else: 617 self.capHeight = self.ascent 618 else: 619 # Microsoft TTFs require an OS/2 table; Apple ones do not. Try to 620 # cope. The data is not very important anyway. 621 usWeightClass = 500 622 self.ascent = scale(yMax) 623 self.descent = scale(yMin) 624 self.capHeight = self.ascent 625 626 # There's no way to get stemV from a TTF file short of analyzing actual outline data 627 # This fuzzy formula is taken from pdflib sources, but we could just use 0 here 628 self.stemV = 50 + int((usWeightClass / 65.0) ** 2) 629 630 # post - PostScript table 631 # (needs data from OS/2 table) 632 self.seek_table("post") 633 ver_maj, ver_min = self.read_ushort(), self.read_ushort() 634 if ver_maj not in (1, 2, 3, 4): 635 # Adobe/MS documents 1, 2, 2.5, 3; Apple also has 4. 636 # From Apple docs it seems that we do not need to care 637 # about the exact version, so if you get this error, you can 638 # try to remove this check altogether. 639 raise TTFError('Unknown post table version %d.%04x' % (ver_maj, ver_min)) 640 self.italicAngle = self.read_short() + self.read_ushort() / 65536.0 641 self.underlinePosition = self.read_short() 642 self.underlineThickness = self.read_short() 643 isFixedPitch = self.read_ulong() 644 645 self.flags = FF_SYMBOLIC # All fonts that contain characters 646 # outside the original Adobe character 647 # set are considered "symbolic". 648 if self.italicAngle!= 0: 649 self.flags = self.flags | FF_ITALIC 650 if usWeightClass >= 600: # FW_REGULAR == 500, FW_SEMIBOLD == 600 651 self.flags = self.flags | FF_FORCEBOLD 652 if isFixedPitch: 653 self.flags = self.flags | FF_FIXED 654 # XXX: FF_SERIF? FF_SCRIPT? FF_ALLCAP? FF_SMALLCAP? 655 656 # hhea - Horizontal header table 657 self.seek_table("hhea") 658 ver_maj, ver_min = self.read_ushort(), self.read_ushort() 659 if ver_maj != 1: 660 raise TTFError('Unknown hhea table version %d.%04x' % (ver_maj, ver_min)) 661 self.skip(28) 662 metricDataFormat = self.read_ushort() 663 if metricDataFormat != 0: 664 raise TTFError('Unknown horizontal metric data format (%d)' % metricDataFormat) 665 numberOfHMetrics = self.read_ushort() 666 if numberOfHMetrics == 0: 667 raise TTFError('Number of horizontal metrics is 0') 668 669 # maxp - Maximum profile table 670 self.seek_table("maxp") 671 ver_maj, ver_min = self.read_ushort(), self.read_ushort() 672 if ver_maj != 1: 673 raise TTFError('Unknown maxp table version %d.%04x' % (ver_maj, ver_min)) 674 self.numGlyphs = numGlyphs = self.read_ushort() 675 if not subsettingAllowed: 676 if self.numGlyphs>0xFF: 677 raise TTFError('Font does not allow subsetting/embedding (%04X)' % fsType) 678 else: 679 self._full_font = True 680 else: 681 self._full_font = False 682 683 if not charInfo: 684 self.charToGlyph = None 685 self.defaultWidth = None 686 self.charWidths = None 687 return 688 689 if glyphDataFormat != 0: 690 raise TTFError('Unknown glyph data format (%d)' % glyphDataFormat) 691 692 # cmap - Character to glyph index mapping table 693 cmap_offset = self.seek_table("cmap") 694 cmapVersion = self.read_ushort() 695 cmapTableCount = self.read_ushort() 696 if cmapTableCount==0 and cmapVersion!=0: 697 cmapTableCount, cmapVersion = cmapVersion, cmapTableCount 698 encoffs = None 699 enc = 0 700 for n in xrange(cmapTableCount): 701 platform = self.read_ushort() 702 encoding = self.read_ushort() 703 offset = self.read_ulong() 704 if platform==3: 705 enc = 1 706 encoffs = offset 707 elif platform==1 and encoding==0 and enc!=1: 708 enc = 2 709 encoffs = offset 710 elif platform==1 and encoding==1: 711 enc = 1 712 encoffs = offset 713 elif platform==0 and encoding!=5: 714 enc = 1 715 encoffs = offset 716 if encoffs is None: 717 raise TTFError('could not find a suitable cmap encoding') 718 encoffs += cmap_offset 719 self.seek(encoffs) 720 fmt = self.read_ushort() 721 self.charToGlyph = charToGlyph = {} 722 glyphToChar = {} 723 if fmt in (13,12,10,8): 724 self.skip(2) #padding 725 length = self.read_ulong() 726 lang = self.read_ulong() 727 else: 728 length = self.read_ushort() 729 lang = self.read_ushort() 730 if fmt==0: 731 T = [self.read_uint8() for i in xrange(length-6)] 732 for unichar in xrange(min(256,self.numGlyphs,len(table))): 733 glyph = T[glyph] 734 charToGlyph[unichar] = glyph 735 glyphToChar.setdefault(glyph,[]).append(unichar) 736 elif fmt==4: 737 limit = encoffs + length 738 segCount = int(self.read_ushort() / 2.0) 739 self.skip(6) 740 endCount = [self.read_ushort() for _ in xrange(segCount)] 741 self.skip(2) 742 startCount = [self.read_ushort() for _ in xrange(segCount)] 743 idDelta = [self.read_short() for _ in xrange(segCount)] 744 idRangeOffset_start = self._pos 745 idRangeOffset = [self.read_ushort() for _ in xrange(segCount)] 746 747 # Now it gets tricky. 748 for n in xrange(segCount): 749 for unichar in xrange(startCount[n], endCount[n] + 1): 750 if idRangeOffset[n] == 0: 751 glyph = (unichar + idDelta[n]) & 0xFFFF 752 else: 753 offset = (unichar - startCount[n]) * 2 + idRangeOffset[n] 754 offset = idRangeOffset_start + 2 * n + offset 755 if offset >= limit: 756 # workaround for broken fonts (like Thryomanes) 757 glyph = 0 758 else: 759 glyph = self.get_ushort(offset) 760 if glyph != 0: 761 glyph = (glyph + idDelta[n]) & 0xFFFF 762 charToGlyph[unichar] = glyph 763 glyphToChar.setdefault(glyph,[]).append(unichar) 764 elif fmt==6: 765 first = self.read_ushort() 766 count = self.read_ushort() 767 for glyph in xrange(first,first+count): 768 unichar = self.read_ushort() 769 charToGlyph[unichar] = glyph 770 glyphToChar.setdefault(glyph,[]).append(unichar) 771 elif fmt==10: 772 first = self.read_ulong() 773 count = self.read_ulong() 774 for glyph in xrange(first,first+count): 775 unichar = self.read_ushort() 776 charToGlyph[unichar] = glyph 777 glyphToChar.setdefault(glyph,[]).append(unichar) 778 elif fmt==12: 779 segCount = self.read_ulong() 780 for n in xrange(segCount): 781 start = self.read_ulong() 782 end = self.read_ulong() 783 inc = self.read_ulong() - start 784 for unichar in xrange(start,end+1): 785 glyph = unichar + inc 786 charToGlyph[unichar] = glyph 787 glyphToChar.setdefault(glyph,[]).append(unichar) 788 elif fmt==13: 789 segCount = self.read_ulong() 790 for n in xrange(segCount): 791 start = self.read_ulong() 792 end = self.read_ulong() 793 gid = self.read_ulong() 794 for unichar in xrange(start,end+1): 795 charToGlyph[unichar] = gid 796 glyphToChar.setdefault(gid,[]).append(unichar) 797 elif fmt==2: 798 T = [self.read_ushort() for i in xrange(256)] #subheader keys 799 maxSHK = max(T) 800 SH = [] 801 for i in xrange(maxSHK+1): 802 firstCode = self.read_ushort() 803 entryCount = self.read_ushort() 804 idDelta = self.read_ushort() 805 idRangeOffset = (self.read_ushort()-(maxSHK-i)*8-2)>>1 806 SH.append(CMapFmt2SubHeader(firstCode,entryCount,idDelta,idRangeOffset)) 807 #number of glyph indexes to read. it is the length of the entire subtable minus that bit we've read so far 808 entryCount = (length-(self._pos-(cmap_offset+encoffs)))>>1 809 glyphs = [self.read_short() for i in xrange(entryCount)] 810 last = -1 811 for unichar in xrange(256): 812 if T[unichar]==0: 813 #Special case, single byte encoding entry, look unichar up in subhead 814 if last!=-1: 815 glyph = 0 816 elif (unichar<SH[0].firstCode or unichar>=SH[0].firstCode+SH[0].entryCount or 817 SH[0].idRangeOffset+(unichar-SH[0].firstCode)>=entryCount): 818 glyph = 0 819 else: 820 glyph = glyphs[SH[0].idRangeOffset+(unichar-SH[0].firstCode)] 821 if glyph!=0: 822 glyph += SH[0].idDelta 823 #assume the single byte codes are ascii 824 if glyph!=0 and glyph<self.numGlyphs: 825 charToGlyph[unichar] = glyph 826 glyphToChar.setdefault(glyph,[]).append(unichar) 827 else: 828 k = T[unichar] 829 for j in xrange(SH[k].entryCount): 830 if SH[k].idRangeOffset+j>=entryCount: 831 glyph = 0 832 else: 833 glyph = glyphs[SH[k].idRangeOffset+j] 834 if glyph!= 0: 835 glyph += SH[k].idDelta 836 if glyph!=0 and glyph<self.numGlyphs: 837 enc = (unichar<<8)|(j+SH[k].firstCode) 838 charToGlyph[enc] = glyph 839 glyphToChar.setdefault(glyph,[]).append(enc) 840 if last==-1: 841 last = unichar 842 else: 843 raise ValueError('Unsupported cmap encoding format %d' % fmt) 844 845 # hmtx - Horizontal metrics table 846 # (needs data from hhea, maxp, and cmap tables) 847 self.seek_table("hmtx") 848 aw = None 849 self.charWidths = charWidths = {} 850 self.hmetrics = [] 851 for glyph in xrange(numberOfHMetrics): 852 # advance width and left side bearing. lsb is actually signed 853 # short, but we don't need it anyway (except for subsetting) 854 aw, lsb = self.read_ushort(), self.read_ushort() 855 self.hmetrics.append((aw, lsb)) 856 aw = scale(aw) 857 if glyph == 0: 858 self.defaultWidth = aw 859 if glyph in glyphToChar: 860 for char in glyphToChar[glyph]: 861 charWidths[char] = aw 862 for glyph in xrange(numberOfHMetrics, numGlyphs): 863 # the rest of the table only lists advance left side bearings. 864 # so we reuse aw set by the last iteration of the previous loop 865 lsb = self.read_ushort() 866 self.hmetrics.append((aw, lsb)) 867 if glyph in glyphToChar: 868 for char in glyphToChar[glyph]: 869 charWidths[char] = aw 870 871 # loca - Index to location 872 if 'loca' not in self.table: raise TTFError('missing location table') 873 self.seek_table('loca') 874 self.glyphPos = [] 875 if indexToLocFormat == 0: 876 for n in xrange(numGlyphs + 1): 877 self.glyphPos.append(self.read_ushort() << 1) 878 elif indexToLocFormat == 1: 879 for n in xrange(numGlyphs + 1): 880 self.glyphPos.append(self.read_ulong()) 881 else: 882 raise TTFError('Unknown location table format (%d)' % indexToLocFormat) 883 if 0x20 in charToGlyph: 884 charToGlyph[0xa0] = charToGlyph[0x20] 885 charWidths[0xa0] = charWidths[0x20] 886 elif 0xa0 in charToGlyph: 887 charToGlyph[0x20] = charToGlyph[0xa0] 888 charWidths[0x20] = charWidths[0xa0] 889 890 # Subsetting 891 892 def makeSubset(self, subset): 893 """Create a subset of a TrueType font""" 894 output = TTFontMaker() 895 896 # Build a mapping of glyphs in the subset to glyph numbers in 897 # the original font. Also build a mapping of UCS codes to 898 # glyph values in the new font. 899 900 # Start with 0 -> 0: "missing character" 901 glyphMap = [0] # new glyph index -> old glyph index 902 glyphSet = {0:0} # old glyph index -> new glyph index 903 codeToGlyph = {} # unicode -> new glyph index 904 for code in subset: 905 if code in self.charToGlyph: 906 originalGlyphIdx = self.charToGlyph[code] 907 else: 908 originalGlyphIdx = 0 909 if originalGlyphIdx not in glyphSet: 910 glyphSet[originalGlyphIdx] = len(glyphMap) 911 glyphMap.append(originalGlyphIdx) 912 codeToGlyph[code] = glyphSet[originalGlyphIdx] 913 914 # Also include glyphs that are parts of composite glyphs 915 start = self.get_table_pos('glyf')[0] 916 n = 0 917 while n < len(glyphMap): 918 originalGlyphIdx = glyphMap[n] 919 glyphPos = self.glyphPos[originalGlyphIdx] 920 glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos 921 n += 1 922 if not glyphLen: continue 923 self.seek(start + glyphPos) 924 numberOfContours = self.read_short() 925 if numberOfContours < 0: 926 # composite glyph 927 self.skip(8) 928 flags = GF_MORE_COMPONENTS 929 while flags & GF_MORE_COMPONENTS: 930 flags = self.read_ushort() 931 glyphIdx = self.read_ushort() 932 if glyphIdx not in glyphSet: 933 glyphSet[glyphIdx] = len(glyphMap) 934 glyphMap.append(glyphIdx) 935 if flags & GF_ARG_1_AND_2_ARE_WORDS: 936 self.skip(4) 937 else: 938 self.skip(2) 939 if flags & GF_WE_HAVE_A_SCALE: 940 self.skip(2) 941 elif flags & GF_WE_HAVE_AN_X_AND_Y_SCALE: 942 self.skip(4) 943 elif flags & GF_WE_HAVE_A_TWO_BY_TWO: 944 self.skip(8) 945 946 947 # The following tables are simply copied from the original 948 for tag in ('name', 'OS/2', 'cvt ', 'fpgm', 'prep'): 949 try: 950 output.add(tag, self.get_table(tag)) 951 except KeyError: 952 # Apparently some of the tables are optional (cvt, fpgm, prep). 953 # The lack of the required ones (name, OS/2) would have already 954 # been caught before. 955 pass 956 957 # post - PostScript 958 post = b"\x00\x03\x00\x00" + self.get_table('post')[4:16] + b"\x00" * 16 959 output.add('post', post) 960 961 numGlyphs = len(glyphMap) 962 963 # hmtx - Horizontal Metrics 964 hmtx = [] 965 for n in xrange(numGlyphs): 966 aw, lsb = self.hmetrics[glyphMap[n]] 967 hmtx.append(int(aw)) 968 hmtx.append(int(lsb)) 969 970 #work out n as 0 or first aw that's the start of a run 971 n = len(hmtx)-2 972 while n and hmtx[n]==hmtx[n-2]: 973 n -= 2 974 if not n: n = 2 #need at least one pair 975 numberOfHMetrics = n>>1 #number of full H Metric pairs 976 hmtx = hmtx[:n] + hmtx[n+1::2] #full pairs + all the trailing lsb's 977 978 hmtx = pack(*([">%dH" % len(hmtx)] + hmtx)) 979 output.add('hmtx', hmtx) 980 981 # hhea - Horizontal Header 982 hhea = self.get_table('hhea') 983 hhea = _set_ushort(hhea, 34, numberOfHMetrics) 984 output.add('hhea', hhea) 985 986 # maxp - Maximum Profile 987 maxp = self.get_table('maxp') 988 maxp = _set_ushort(maxp, 4, numGlyphs) 989 output.add('maxp', maxp) 990 991 # cmap - Character to glyph mapping 992 # XXX maybe use format 0 if possible, not 6? 993 entryCount = len(subset) 994 length = 10 + entryCount * 2 995 cmap = [0, 1, # version, number of tables 996 1, 0, 0,12, # platform, encoding, offset (hi,lo) 997 6, length, 0, # format, length, language 998 0, 999 entryCount] + \ 1000 list(map(codeToGlyph.get, subset)) 1001 cmap = pack(*([">%dH" % len(cmap)] + cmap)) 1002 output.add('cmap', cmap) 1003 1004 # glyf - Glyph data 1005 glyphData = self.get_table('glyf') 1006 offsets = [] 1007 glyf = [] 1008 pos = 0 1009 for n in xrange(numGlyphs): 1010 offsets.append(pos) 1011 originalGlyphIdx = glyphMap[n] 1012 glyphPos = self.glyphPos[originalGlyphIdx] 1013 glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos 1014 data = glyphData[glyphPos:glyphPos+glyphLen] 1015 # Fix references in composite glyphs 1016 if glyphLen > 2 and unpack(">h", data[:2])[0] < 0: 1017 # composite glyph 1018 pos_in_glyph = 10 1019 flags = GF_MORE_COMPONENTS 1020 while flags & GF_MORE_COMPONENTS: 1021 flags = unpack(">H", data[pos_in_glyph:pos_in_glyph+2])[0] 1022 glyphIdx = unpack(">H", data[pos_in_glyph+2:pos_in_glyph+4])[0] 1023 data = _set_ushort(data, pos_in_glyph + 2, glyphSet[glyphIdx]) 1024 pos_in_glyph = pos_in_glyph + 4 1025 if flags & GF_ARG_1_AND_2_ARE_WORDS: 1026 pos_in_glyph = pos_in_glyph + 4 1027 else: 1028 pos_in_glyph = pos_in_glyph + 2 1029 if flags & GF_WE_HAVE_A_SCALE: 1030 pos_in_glyph = pos_in_glyph + 2 1031 elif flags & GF_WE_HAVE_AN_X_AND_Y_SCALE: 1032 pos_in_glyph = pos_in_glyph + 4 1033 elif flags & GF_WE_HAVE_A_TWO_BY_TWO: 1034 pos_in_glyph = pos_in_glyph + 8 1035 glyf.append(data) 1036 pos = pos + glyphLen 1037 if pos % 4 != 0: 1038 padding = 4 - pos % 4 1039 glyf.append(b'\0' * padding) 1040 pos = pos + padding 1041 offsets.append(pos) 1042 output.add('glyf', b''.join(glyf)) 1043 1044 # loca - Index to location 1045 loca = [] 1046 if (pos + 1) >> 1 > 0xFFFF: 1047 indexToLocFormat = 1 # long format 1048 for offset in offsets: 1049 loca.append(offset) 1050 loca = pack(*([">%dL" % len(loca)] + loca)) 1051 else: 1052 indexToLocFormat = 0 # short format 1053 for offset in offsets: 1054 loca.append(offset >> 1) 1055 loca = pack(*([">%dH" % len(loca)] + loca)) 1056 output.add('loca', loca) 1057 1058 # head - Font header 1059 head = self.get_table('head') 1060 head = _set_ushort(head, 50, indexToLocFormat) 1061 output.add('head', head) 1062 1063 return output.makeStream() 1064 1065 1066# 1067# TrueType font embedding 1068# 1069 1070# PDF font flags (see PDF Reference Guide table 5.19) 1071FF_FIXED = 1 << 1-1 1072FF_SERIF = 1 << 2-1 1073FF_SYMBOLIC = 1 << 3-1 1074FF_SCRIPT = 1 << 4-1 1075FF_NONSYMBOLIC = 1 << 6-1 1076FF_ITALIC = 1 << 7-1 1077FF_ALLCAP = 1 << 17-1 1078FF_SMALLCAP = 1 << 18-1 1079FF_FORCEBOLD = 1 << 19-1 1080 1081class TTFontFace(TTFontFile, pdfmetrics.TypeFace): 1082 """TrueType typeface. 1083 1084 Conceptually similar to a single byte typeface, but the glyphs are 1085 identified by UCS character codes instead of glyph names.""" 1086 1087 def __init__(self, filename, validate=0, subfontIndex=0): 1088 "Loads a TrueType font from filename." 1089 pdfmetrics.TypeFace.__init__(self, None) 1090 TTFontFile.__init__(self, filename, validate=validate, subfontIndex=subfontIndex) 1091 1092 def getCharWidth(self, code): 1093 "Returns the width of character U+<code>" 1094 return self.charWidths.get(code, self.defaultWidth) 1095 1096 def addSubsetObjects(self, doc, fontname, subset): 1097 """Generate a TrueType font subset and add it to the PDF document. 1098 Returns a PDFReference to the new FontDescriptor object.""" 1099 1100 fontFile = pdfdoc.PDFStream() 1101 fontFile.content = self.makeSubset(subset) 1102 fontFile.dictionary['Length1'] = len(fontFile.content) 1103 if doc.compression: 1104 fontFile.filters = [pdfdoc.PDFZCompress] 1105 fontFileRef = doc.Reference(fontFile, 'fontFile:%s(%s)' % (self.filename, fontname)) 1106 1107 flags = self.flags & ~ FF_NONSYMBOLIC 1108 flags = flags | FF_SYMBOLIC 1109 1110 fontDescriptor = pdfdoc.PDFDictionary({ 1111 'Type': '/FontDescriptor', 1112 'Ascent': self.ascent, 1113 'CapHeight': self.capHeight, 1114 'Descent': self.descent, 1115 'Flags': flags, 1116 'FontBBox': pdfdoc.PDFArray(self.bbox), 1117 'FontName': pdfdoc.PDFName(fontname), 1118 'ItalicAngle': self.italicAngle, 1119 'StemV': self.stemV, 1120 'FontFile2': fontFileRef, 1121 }) 1122 return doc.Reference(fontDescriptor, 'fontDescriptor:' + fontname) 1123 1124class TTEncoding: 1125 """Encoding for TrueType fonts (always UTF-8). 1126 1127 TTEncoding does not directly participate in PDF object creation, since 1128 we need a number of different 8-bit encodings for every generated font 1129 subset. TTFont itself cares about that.""" 1130 1131 def __init__(self): 1132 self.name = "UTF-8" 1133 1134class TTFont: 1135 """Represents a TrueType font. 1136 1137 Its encoding is always UTF-8. 1138 1139 Note: you cannot use the same TTFont object for different documents 1140 at the same time. 1141 1142 Example of usage: 1143 1144 font = ttfonts.TTFont('PostScriptFontName', '/path/to/font.ttf') 1145 pdfmetrics.registerFont(font) 1146 1147 canvas.setFont('PostScriptFontName', size) 1148 canvas.drawString(x, y, "Some text encoded in UTF-8") 1149 """ 1150 class State: 1151 namePrefix = 'F' 1152 def __init__(self,asciiReadable=None,ttf=None): 1153 A = self.assignments = {} 1154 self.nextCode = 0 1155 self.internalName = None 1156 self.frozen = 0 1157 if getattr(getattr(ttf,'face',None),'_full_font',None): 1158 C = set(self.charToGlyph.keys()) 1159 if 0xa0 in C: C.remove(0xa0) 1160 for n in xrange(256): 1161 if n in C: 1162 A[n] = n 1163 C.remove(n) 1164 for n in C: 1165 A[n] = n 1166 self.subsets = [[n for n in A]] 1167 self.frozen = True 1168 return 1169 1170 if asciiReadable is None: 1171 asciiReadable = rl_config.ttfAsciiReadable 1172 1173 if asciiReadable: 1174 # Let's add the first 128 unicodes to the 0th subset, so ' ' 1175 # always has code 32 (for word spacing to work) and the ASCII 1176 # output is readable 1177 subset0 = list(xrange(128)) 1178 self.subsets = [subset0] 1179 for n in subset0: 1180 A[n] = n 1181 self.nextCode = 128 1182 else: 1183 self.subsets = [[32]*33] 1184 A[32] = 32 1185 1186 _multiByte = 1 # We want our own stringwidth 1187 _dynamicFont = 1 # We want dynamic subsetting 1188 1189 def __init__(self, name, filename, validate=0, subfontIndex=0,asciiReadable=None): 1190 """Loads a TrueType font from filename. 1191 1192 If validate is set to a false values, skips checksum validation. This 1193 can save time, especially if the font is large. 1194 """ 1195 self.fontName = name 1196 self.face = TTFontFace(filename, validate=validate, subfontIndex=subfontIndex) 1197 self.encoding = TTEncoding() 1198 from weakref import WeakKeyDictionary 1199 self.state = WeakKeyDictionary() 1200 if asciiReadable is None: 1201 asciiReadable = rl_config.ttfAsciiReadable 1202 self._asciiReadable = asciiReadable 1203 1204 def stringWidth(self,text,size,encoding='utf8'): 1205 return instanceStringWidthTTF(self,text,size,encoding) 1206 1207 def _assignState(self,doc,asciiReadable=None,namePrefix=None): 1208 '''convenience function for those wishing to roll their own state properties''' 1209 if asciiReadable is None: 1210 asciiReadable = self._asciiReadable 1211 try: 1212 state = self.state[doc] 1213 except KeyError: 1214 state = self.state[doc] = TTFont.State(asciiReadable,self) 1215 if namePrefix is not None: 1216 state.namePrefix = namePrefix 1217 return state 1218 1219 def splitString(self, text, doc, encoding='utf-8'): 1220 """Splits text into a number of chunks, each of which belongs to a 1221 single subset. Returns a list of tuples (subset, string). Use subset 1222 numbers with getSubsetInternalName. Doc is needed for distinguishing 1223 subsets when building different documents at the same time.""" 1224 asciiReadable = self._asciiReadable 1225 try: state = self.state[doc] 1226 except KeyError: state = self.state[doc] = TTFont.State(asciiReadable,self) 1227 curSet = -1 1228 cur = [] 1229 results = [] 1230 if not isUnicode(text): 1231 text = text.decode('utf-8') # encoding defaults to utf-8 1232 assignments = state.assignments 1233 subsets = state.subsets 1234 reserveTTFNotdef = rl_config.reserveTTFNotdef 1235 for code in map(ord,text): 1236 if code==0xa0: code = 32 #map nbsp into space 1237 if code in assignments: 1238 n = assignments[code] 1239 else: 1240 if state.frozen: 1241 raise pdfdoc.PDFError("Font %s is already frozen, cannot add new character U+%04X" % (self.fontName, code)) 1242 n = state.nextCode 1243 if n&0xFF==32: 1244 # make code 32 always be a space character 1245 if n!=32: subsets[n >> 8].append(32) 1246 state.nextCode += 1 1247 n = state.nextCode 1248 if n>32: 1249 if not(n&0xFF): 1250 if reserveTTFNotdef: 1251 subsets.append([0]) #force code 0 in as notdef 1252 state.nextCode += 1 1253 n = state.nextCode 1254 else: 1255 subsets.append([]) 1256 subsets[n >> 8].append(code) 1257 else: 1258 subsets[0][n] = code 1259 state.nextCode += 1 1260 assignments[code] = n 1261 #subsets[n>>8].append(code) 1262 if (n >> 8) != curSet: 1263 if cur: 1264 results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur))) 1265 curSet = (n >> 8) 1266 cur = [] 1267 cur.append(n & 0xFF) 1268 if cur: 1269 results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur))) 1270 return results 1271 1272 def getSubsetInternalName(self, subset, doc): 1273 """Returns the name of a PDF Font object corresponding to a given 1274 subset of this dynamic font. Use this function instead of 1275 PDFDocument.getInternalFontName.""" 1276 try: state = self.state[doc] 1277 except KeyError: state = self.state[doc] = TTFont.State(self._asciiReadable) 1278 if subset < 0 or subset >= len(state.subsets): 1279 raise IndexError('Subset %d does not exist in font %s' % (subset, self.fontName)) 1280 if state.internalName is None: 1281 state.internalName = state.namePrefix +repr(len(doc.fontMapping) + 1) 1282 doc.fontMapping[self.fontName] = '/' + state.internalName 1283 doc.delayedFonts.append(self) 1284 return '/%s+%d' % (state.internalName, subset) 1285 1286 def addObjects(self, doc): 1287 """Makes one or more PDF objects to be added to the document. The 1288 caller supplies the internal name to be used (typically F1, F2, ... in 1289 sequence). 1290 1291 This method creates a number of Font and FontDescriptor objects. Every 1292 FontDescriptor is a (no more than) 256 character subset of the original 1293 TrueType font.""" 1294 try: state = self.state[doc] 1295 except KeyError: state = self.state[doc] = TTFont.State(self._asciiReadable) 1296 state.frozen = 1 1297 for n,subset in enumerate(state.subsets): 1298 internalName = self.getSubsetInternalName(n, doc)[1:] 1299 baseFontName = (b''.join((SUBSETN(n),b'+',self.face.name,self.face.subfontNameX))).decode('pdfdoc') 1300 1301 pdfFont = pdfdoc.PDFTrueTypeFont() 1302 pdfFont.__Comment__ = 'Font %s subset %d' % (self.fontName, n) 1303 pdfFont.Name = internalName 1304 pdfFont.BaseFont = baseFontName 1305 1306 pdfFont.FirstChar = 0 1307 pdfFont.LastChar = len(subset) - 1 1308 1309 widths = list(map(self.face.getCharWidth, subset)) 1310 pdfFont.Widths = pdfdoc.PDFArray(widths) 1311 1312 cmapStream = pdfdoc.PDFStream() 1313 cmapStream.content = makeToUnicodeCMap(baseFontName, subset) 1314 if doc.compression: 1315 cmapStream.filters = [pdfdoc.PDFZCompress] 1316 pdfFont.ToUnicode = doc.Reference(cmapStream, 'toUnicodeCMap:' + baseFontName) 1317 1318 pdfFont.FontDescriptor = self.face.addSubsetObjects(doc, baseFontName, subset) 1319 1320 # link it in 1321 ref = doc.Reference(pdfFont, internalName) 1322 fontDict = doc.idToObject['BasicFonts'].dict 1323 fontDict[internalName] = pdfFont 1324 del self.state[doc] 1325 1326#preserve the initial values here 1327def _reset(): 1328 _cached_ttf_dirs.clear() 1329 1330from reportlab.rl_config import register_reset 1331register_reset(_reset) 1332del register_reset 1333