1#!/usr/bin/env python 2# 3# Copyright 2015 Google Inc. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17from fontTools.ttLib import TTFont 18from fontTools import cffLib 19from fontTools.misc import psCharStrings 20from fontTools.pens import basePen 21import matplotlib.pyplot as plt 22import functools 23import itertools 24import os 25import argparse 26 27""" 28Prints out some stats about a set of fonts, mostly 29related to subroutines. 30 31Dependencies: 32 - matplotlib 33 - fontTools 34 35Usage: 36>>> ./subr_grapher.py font1.otf font2.otf font3.otf cff_table.cff 37 38NOTE: if the file extension is `cff`, it will be 39interpreted as a raw CFF table. 40""" 41 42SINGLE_BYTE_OPS = set(['hstem', 43 'vstem', 44 'vmoveto', 45 'rlineto', 46 'hlineto', 47 'vlineto', 48 'rrcurveto', 49 'callsubr', 50 'return', 51 'endchar', 52 'blend', 53 'hstemhm', 54 'hintmask', 55 'cntrmask', 56 'rmoveto', 57 'hmoveto', 58 'vstemhm', 59 'rcurveline', 60 'rlinecurve', 61 'vvcurveto', 62 'hhcurveto', 63 # 'shortint', # not really an operatr 64 'callgsubr', 65 'vhcurveto', 66 'hvcurveto']) 67 68def tokenCost(token): 69 """Calculate the bytecode size of a T2 Charstring token""" 70 71 tp = type(token) 72 if issubclass(tp, basestring): 73 if token[:8] in ("hintmask", "cntrmask"): 74 return 1 + len(token[9:]) 75 elif token in SINGLE_BYTE_OPS: 76 return 1 77 else: 78 return 2 79 elif tp == tuple: 80 assert token[0] in ("hintmask", "cntrmask") 81 return 1 + len(token[1]) 82 elif tp == int: 83 if -107 <= token <= 107: 84 return 1 85 elif 108 <= token <= 1131 or -1131 <= token <= -108: 86 return 2 87 else: 88 return 3 89 elif tp == float: 90 return 5 91 assert 0 92 93def get_cff(filename): 94 if os.path.splitext(filename)[1] == '.cff': 95 res = cffLib.CFFFontSet() 96 res.decompile(open(filename), None) 97 return res 98 else: 99 return TTFont(filename)['CFF '].cff 100 101def get_cs_bytes(td, fds): 102 count = 0 103 for cs in td.GlobalSubrs: 104 count += len(cs.bytecode) 105 for fd in fds: 106 try: 107 for cs in fd.Private.Subrs: 108 count += len(cs.bytecode) 109 except AttributeError: 110 pass 111 for cs in td.CharStrings.values(): 112 count += len(cs.bytecode) 113 return count 114 115def print_n_subroutines(name, td, fds): 116 print("%s:\n\tGlobal Subrs: %d" % (name, len(td.GlobalSubrs))) 117 for i, fd in enumerate(fds): 118 try: 119 x = len(fd.Private.Subrs) 120 except AttributeError: 121 x = 0 122 print("\tFD %d Subrs: %d" % (i, x)) 123 124def get_savings(td, fds): 125 gsavings = [-(s.subr_cost + 2) if s.program else 0 for s in td.GlobalSubrs] 126 lsavings = [[-(s.subr_cost + 2) if s.program else 0 for s in fd.Private.Subrs] for fd in fds] 127 gusages = [0 for _ in td.GlobalSubrs] 128 lusages = [[0 for _ in fd.Private.Subrs] for fd in fds] 129 gbias = psCharStrings.calcSubrBias(td.GlobalSubrs) 130 lbias = map(lambda fd: psCharStrings.calcSubrBias(fd.Private.Subrs) 131 if hasattr(fd.Private, 'Subrs') else 0, 132 fds) 133 134 def count_subr(idx, is_global, fdidx=-1): 135 if is_global: 136 gsavings[idx + gbias] += (td.GlobalSubrs[idx + gbias].subr_saving - tokenCost(idx) - 1) 137 gusages[idx + gbias] += 1 138 subr = td.GlobalSubrs[idx + gbias] 139 else: 140 assert fdidx >= 0 141 lsavings[fdidx][idx + lbias[fdidx]] += (fds[fdidx].Private.Subrs[idx + lbias[fdidx]].subr_saving - tokenCost(idx) - 1) 142 lusages[fdidx][idx + lbias[fdidx]] += 1 143 subr = fds[fdidx].Private.Subrs[idx + lbias[fdidx]] 144 145 # follow called subrs: 146 for before, tok in zip(subr.program, subr.program[1:]): 147 if tok == 'callgsubr': 148 count_subr(before, True, fdidx) 149 elif tok == 'callsubr': 150 count_subr(before, False, fdidx) 151 152 for g in td.charset: 153 cs, sel = td.CharStrings.getItemAndSelector(g) 154 for before, tok in zip(cs.program, cs.program[1:]): 155 if tok == 'callgsubr': 156 count_subr(before, True, sel) 157 elif tok == 'callsubr': 158 count_subr(before, False, sel) 159 160 return ((gsavings, lsavings), (gusages, lusages)) 161 162def decompile_charstrings(td, fds): 163 for cs in td.GlobalSubrs: 164 cs.subr_cost = cs.subr_saving = len(cs.bytecode) 165 for fd in fds: 166 try: 167 for cs in fd.Private.Subrs: 168 cs.subr_cost = cs.subr_saving = len(cs.bytecode) 169 except AttributeError: 170 pass 171 for g in td.charset: 172 cs, sel = td.CharStrings.getItemAndSelector(g) 173 cs.decompile() 174 for cs in td.GlobalSubrs: 175 if cs.program and cs.program[-1] == 'return': 176 cs.subr_saving -= 1 177 for fd in fds: 178 try: 179 for cs in fd.Private.Subrs: 180 if cs.program and cs.program[-1] == 'return': 181 cs.subr_saving -= 1 182 except AttributeError: 183 pass 184 185def get_raw_usages(td, fds): 186 gusages = [0 for _ in td.GlobalSubrs] 187 lusages = [[0 for _ in fd.Private.Subrs] for fd in fds] 188 gbias = psCharStrings.calcSubrBias(td.GlobalSubrs) 189 lbias = map(lambda fd: psCharStrings.calcSubrBias(fd.Private.Subrs) 190 if hasattr(fd.Private, 'Subrs') else 0, 191 fds) 192 gsels = [None for _ in td.GlobalSubrs] 193 194 for g in td.charset: 195 cs, sel = td.CharStrings.getItemAndSelector(g) 196 for before, tok in zip(cs.program, cs.program[1:]): 197 if tok == 'callgsubr': 198 gusages[before + gbias] += 1 199 gsels[before + gbias] = sel 200 elif tok == 'callsubr': 201 lusages[sel][before + lbias[sel]] += 1 202 203 for cs, sel in zip(td.GlobalSubrs, gsels): 204 for before, tok in zip(cs.program, cs.program[1:]): 205 if tok == 'callgsubr': 206 gusages[before + gbias] += 1 207 elif tok == 'callsubr': 208 lusages[sel][before + lbias[sel]] += 1 209 210 for sel, fd in enumerate(fds): 211 if hasattr(fd.Private, 'Subrs'): 212 for cs in fd.Private.Subrs: 213 for before, tok in zip(cs.program, cs.program[1:]): 214 if tok == 'callgsubr': 215 gusages[before + gbias] += 1 216 elif tok == 'callsubr': 217 lusages[sel][before + lbias[sel]] += 1 218 219 return (gusages, lusages) 220 221def main(filenames, show_graphs): 222 names = map(os.path.basename, filenames) 223 cffs = map(get_cff, filenames) 224 tds = map(lambda f: f.topDictIndex[0], cffs) 225 fds = map(lambda td: td.FDArray if hasattr(td, 'FDArray') else [], tds) 226 227 n_bytes = map(get_cs_bytes, tds, fds) 228 for name, b in zip(names, n_bytes): 229 print("%s:\n\t%d bytes" % (name, b)) 230 231 map(decompile_charstrings, tds, fds) 232 233 map(print_n_subroutines, names, tds, fds) 234 235 sav_usag = map(get_savings, tds, fds) 236 for name, (savings, usages) in zip(names, sav_usag): 237 tot_savings = savings[0] + list(itertools.chain.from_iterable(savings[1])) 238 tot_usages = usages[0] + list(itertools.chain.from_iterable(usages[1])) 239 avg = float(sum(tot_savings)) / len(tot_savings) 240 print("%s:\n\tAverage savings per subr: %f\n\tMax saving subr: %d\n\tMax usage subr: %d" % (name, avg, max(tot_savings), max(tot_usages))) 241 242 if show_graphs: 243 # plot subrs 244 SHOW_START = 0 245 SHOW_LEN = 200 246 mins = [] 247 maxes = [] 248 plt.figure(0) 249 for savings, usages in sav_usag: 250 tot_savings = savings[0] + list(itertools.chain.from_iterable(savings[1])) 251 plot_savings = sorted(tot_savings, reverse=True)[SHOW_START:SHOW_START+SHOW_LEN] 252 plt.plot(range(len(plot_savings)), plot_savings) 253 mins.append(min(plot_savings)) 254 maxes.append(max(plot_savings)) 255 plt.ylim([min(mins) - 1, max(maxes) + 1]) 256 plt.title("Subroutine Savings") 257 plt.xlabel("Subroutine") 258 plt.ylabel("Savings (bytes)") 259 260 raw_usages = map(get_raw_usages, tds, fds) 261 fig = 1 262 for gusages, lusages in raw_usages: 263 for idx, usages in zip(['Global'] + range(len(lusages)), [gusages] + lusages): 264 if usages: 265 bias = psCharStrings.calcSubrBias(usages) 266 if bias == 1131: 267 orig_order_usages = usages[1024:1240] + usages[0:1024] + usages[1240:] 268 elif bias == 32768: 269 orig_order_usages = (usages[32661:32877] + usages[31637:32661] + 270 usages[32877:33901] + usages[0:31637] + 271 usages[33901:]) 272 else: 273 orig_order_usages = usages 274 plt.figure(fig) 275 plt.plot(range(len(orig_order_usages)), orig_order_usages, color='b') 276 plt.title("Subroutine usages for FD %s" % idx) 277 plt.axvline(215, 0, max(orig_order_usages), color='r') 278 plt.axvline(2263, 0, max(orig_order_usages), color='r') 279 plt.ylim([0, max(orig_order_usages)]) 280 plt.xlim([0, len(orig_order_usages)]) 281 fig += 1 282 plt.show() 283 284if __name__ == '__main__': 285 parser = argparse.ArgumentParser( 286 description="""FontTools Compreffor will take a CFF-flavored 287 OpenType font and automatically detect 288 repeated routines and generate subroutines 289 to minimize the disk space needed to 290 represent a font.""") 291 parser.add_argument('filenames', help="the path to font files", nargs='+') 292 parser.add_argument('-g', '--show-graphs', help="show graphs", action='store_true', 293 default=False) 294 295 kwargs = vars(parser.parse_args()) 296 297 main(**kwargs) 298