1#!/usr/bin/env python
2#
3# Copyright 2015 Google Inc. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17from fontTools.ttLib import TTFont
18from fontTools import cffLib
19from fontTools.misc import psCharStrings
20from fontTools.pens import basePen
21import matplotlib.pyplot as plt
22import functools
23import itertools
24import os
25import argparse
26
27"""
28Prints out some stats about a set of fonts, mostly
29related to subroutines.
30
31Dependencies:
32  - matplotlib
33  - fontTools
34
35Usage:
36>>> ./subr_grapher.py font1.otf font2.otf font3.otf cff_table.cff
37
38NOTE: if the file extension is `cff`, it will be
39interpreted as a raw CFF table.
40"""
41
42SINGLE_BYTE_OPS = set(['hstem',
43                       'vstem',
44                       'vmoveto',
45                       'rlineto',
46                       'hlineto',
47                       'vlineto',
48                       'rrcurveto',
49                       'callsubr',
50                       'return',
51                       'endchar',
52                       'blend',
53                       'hstemhm',
54                       'hintmask',
55                       'cntrmask',
56                       'rmoveto',
57                       'hmoveto',
58                       'vstemhm',
59                       'rcurveline',
60                       'rlinecurve',
61                       'vvcurveto',
62                       'hhcurveto',
63                     # 'shortint',  # not really an operatr
64                       'callgsubr',
65                       'vhcurveto',
66                       'hvcurveto'])
67
68def tokenCost(token):
69    """Calculate the bytecode size of a T2 Charstring token"""
70
71    tp = type(token)
72    if issubclass(tp, basestring):
73        if token[:8] in ("hintmask", "cntrmask"):
74            return 1 + len(token[9:])
75        elif token in SINGLE_BYTE_OPS:
76            return 1
77        else:
78            return 2
79    elif tp == tuple:
80        assert token[0] in ("hintmask", "cntrmask")
81        return 1 + len(token[1])
82    elif tp == int:
83        if -107 <= token <= 107:
84            return 1
85        elif 108 <= token <= 1131 or -1131 <= token <= -108:
86            return 2
87        else:
88            return 3
89    elif tp == float:
90        return 5
91    assert 0
92
93def get_cff(filename):
94    if os.path.splitext(filename)[1] == '.cff':
95        res = cffLib.CFFFontSet()
96        res.decompile(open(filename), None)
97        return res
98    else:
99        return TTFont(filename)['CFF '].cff
100
101def get_cs_bytes(td, fds):
102    count = 0
103    for cs in td.GlobalSubrs:
104        count += len(cs.bytecode)
105    for fd in fds:
106        try:
107            for cs in fd.Private.Subrs:
108                count += len(cs.bytecode)
109        except AttributeError:
110            pass
111    for cs in td.CharStrings.values():
112        count += len(cs.bytecode)
113    return count
114
115def print_n_subroutines(name, td, fds):
116    print("%s:\n\tGlobal Subrs: %d" % (name, len(td.GlobalSubrs)))
117    for i, fd in enumerate(fds):
118        try:
119            x = len(fd.Private.Subrs)
120        except AttributeError:
121            x = 0
122        print("\tFD %d Subrs: %d" % (i, x))
123
124def get_savings(td, fds):
125    gsavings = [-(s.subr_cost + 2) if s.program else 0 for s in td.GlobalSubrs]
126    lsavings = [[-(s.subr_cost + 2) if s.program else 0 for s in fd.Private.Subrs] for fd in fds]
127    gusages = [0 for _ in td.GlobalSubrs]
128    lusages = [[0 for _ in fd.Private.Subrs] for fd in fds]
129    gbias = psCharStrings.calcSubrBias(td.GlobalSubrs)
130    lbias = map(lambda fd: psCharStrings.calcSubrBias(fd.Private.Subrs)
131                           if hasattr(fd.Private, 'Subrs') else 0,
132                fds)
133
134    def count_subr(idx, is_global, fdidx=-1):
135        if is_global:
136            gsavings[idx + gbias] += (td.GlobalSubrs[idx + gbias].subr_saving - tokenCost(idx) - 1)
137            gusages[idx + gbias] += 1
138            subr = td.GlobalSubrs[idx + gbias]
139        else:
140            assert fdidx >= 0
141            lsavings[fdidx][idx + lbias[fdidx]] += (fds[fdidx].Private.Subrs[idx + lbias[fdidx]].subr_saving - tokenCost(idx) - 1)
142            lusages[fdidx][idx + lbias[fdidx]] += 1
143            subr = fds[fdidx].Private.Subrs[idx + lbias[fdidx]]
144
145        # follow called subrs:
146        for before, tok in zip(subr.program, subr.program[1:]):
147            if tok == 'callgsubr':
148                count_subr(before, True, fdidx)
149            elif tok == 'callsubr':
150                count_subr(before, False, fdidx)
151
152    for g in td.charset:
153        cs, sel = td.CharStrings.getItemAndSelector(g)
154        for before, tok in zip(cs.program, cs.program[1:]):
155            if tok == 'callgsubr':
156                count_subr(before, True, sel)
157            elif tok == 'callsubr':
158                count_subr(before, False, sel)
159
160    return ((gsavings, lsavings), (gusages, lusages))
161
162def decompile_charstrings(td, fds):
163    for cs in td.GlobalSubrs:
164        cs.subr_cost = cs.subr_saving = len(cs.bytecode)
165    for fd in fds:
166        try:
167            for cs in fd.Private.Subrs:
168                cs.subr_cost = cs.subr_saving = len(cs.bytecode)
169        except AttributeError:
170            pass
171    for g in td.charset:
172        cs, sel = td.CharStrings.getItemAndSelector(g)
173        cs.decompile()
174    for cs in td.GlobalSubrs:
175        if cs.program and cs.program[-1] == 'return':
176            cs.subr_saving -= 1
177    for fd in fds:
178        try:
179            for cs in fd.Private.Subrs:
180                if cs.program and cs.program[-1] == 'return':
181                    cs.subr_saving -= 1
182        except AttributeError:
183            pass
184
185def get_raw_usages(td, fds):
186    gusages = [0 for _ in td.GlobalSubrs]
187    lusages = [[0 for _ in fd.Private.Subrs] for fd in fds]
188    gbias = psCharStrings.calcSubrBias(td.GlobalSubrs)
189    lbias = map(lambda fd: psCharStrings.calcSubrBias(fd.Private.Subrs)
190                           if hasattr(fd.Private, 'Subrs') else 0,
191                fds)
192    gsels = [None for _ in td.GlobalSubrs]
193
194    for g in td.charset:
195        cs, sel = td.CharStrings.getItemAndSelector(g)
196        for before, tok in zip(cs.program, cs.program[1:]):
197            if tok == 'callgsubr':
198                gusages[before + gbias] += 1
199                gsels[before + gbias] = sel
200            elif tok == 'callsubr':
201                lusages[sel][before + lbias[sel]] += 1
202
203    for cs, sel in zip(td.GlobalSubrs, gsels):
204        for before, tok in zip(cs.program, cs.program[1:]):
205            if tok == 'callgsubr':
206                gusages[before + gbias] += 1
207            elif tok == 'callsubr':
208                lusages[sel][before + lbias[sel]] += 1
209
210    for sel, fd in enumerate(fds):
211        if hasattr(fd.Private, 'Subrs'):
212            for cs in fd.Private.Subrs:
213                for before, tok in zip(cs.program, cs.program[1:]):
214                    if tok == 'callgsubr':
215                        gusages[before + gbias] += 1
216                    elif tok == 'callsubr':
217                        lusages[sel][before + lbias[sel]] += 1
218
219    return (gusages, lusages)
220
221def main(filenames, show_graphs):
222    names = map(os.path.basename, filenames)
223    cffs = map(get_cff, filenames)
224    tds = map(lambda f: f.topDictIndex[0], cffs)
225    fds = map(lambda td: td.FDArray if hasattr(td, 'FDArray') else [], tds)
226
227    n_bytes = map(get_cs_bytes, tds, fds)
228    for name, b in zip(names, n_bytes):
229        print("%s:\n\t%d bytes" % (name, b))
230
231    map(decompile_charstrings, tds, fds)
232
233    map(print_n_subroutines, names, tds, fds)
234
235    sav_usag = map(get_savings, tds, fds)
236    for name, (savings, usages) in zip(names, sav_usag):
237        tot_savings = savings[0] + list(itertools.chain.from_iterable(savings[1]))
238        tot_usages = usages[0] + list(itertools.chain.from_iterable(usages[1]))
239        avg = float(sum(tot_savings)) / len(tot_savings)
240        print("%s:\n\tAverage savings per subr: %f\n\tMax saving subr: %d\n\tMax usage subr: %d" % (name, avg, max(tot_savings), max(tot_usages)))
241
242    if show_graphs:
243        # plot subrs
244        SHOW_START = 0
245        SHOW_LEN = 200
246        mins = []
247        maxes = []
248        plt.figure(0)
249        for savings, usages in sav_usag:
250            tot_savings = savings[0] + list(itertools.chain.from_iterable(savings[1]))
251            plot_savings = sorted(tot_savings, reverse=True)[SHOW_START:SHOW_START+SHOW_LEN]
252            plt.plot(range(len(plot_savings)), plot_savings)
253            mins.append(min(plot_savings))
254            maxes.append(max(plot_savings))
255        plt.ylim([min(mins) - 1, max(maxes) + 1])
256        plt.title("Subroutine Savings")
257        plt.xlabel("Subroutine")
258        plt.ylabel("Savings (bytes)")
259
260        raw_usages = map(get_raw_usages, tds, fds)
261        fig = 1
262        for gusages, lusages in raw_usages:
263            for idx, usages in zip(['Global'] + range(len(lusages)), [gusages] + lusages):
264                if usages:
265                    bias = psCharStrings.calcSubrBias(usages)
266                    if bias == 1131:
267                        orig_order_usages = usages[1024:1240] + usages[0:1024] + usages[1240:]
268                    elif bias == 32768:
269                        orig_order_usages = (usages[32661:32877] + usages[31637:32661] +
270                                             usages[32877:33901] + usages[0:31637] +
271                                             usages[33901:])
272                    else:
273                        orig_order_usages = usages
274                    plt.figure(fig)
275                    plt.plot(range(len(orig_order_usages)), orig_order_usages, color='b')
276                    plt.title("Subroutine usages for FD %s" % idx)
277                    plt.axvline(215, 0, max(orig_order_usages), color='r')
278                    plt.axvline(2263, 0, max(orig_order_usages), color='r')
279                    plt.ylim([0, max(orig_order_usages)])
280                    plt.xlim([0, len(orig_order_usages)])
281                    fig += 1
282        plt.show()
283
284if __name__ == '__main__':
285    parser = argparse.ArgumentParser(
286                        description="""FontTools Compreffor will take a CFF-flavored
287                                       OpenType font and automatically detect
288                                       repeated routines and generate subroutines
289                                       to minimize the disk space needed to
290                                       represent a font.""")
291    parser.add_argument('filenames', help="the path to font files", nargs='+')
292    parser.add_argument('-g', '--show-graphs', help="show graphs", action='store_true',
293                        default=False)
294
295    kwargs = vars(parser.parse_args())
296
297    main(**kwargs)
298