1#------------------------------------------------------------------------------
2# File:         Font.pm
3#
4# Description:  Read meta information from font files
5#
6# Revisions:    2010/01/15 - P. Harvey Created
7#
8# References:   1) http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html
9#               2) http://www.microsoft.com/typography/otspec/otff.htm
10#               3) http://partners.adobe.com/public/developer/opentype/index_font_file.html
11#               4) http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf
12#               5) http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java
13#               6) http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf
14#------------------------------------------------------------------------------
15
16package Image::ExifTool::Font;
17
18use strict;
19use vars qw($VERSION %ttLang);
20use Image::ExifTool qw(:DataAccess :Utils);
21
22$VERSION = '1.10';
23
24sub ProcessOTF($$);
25
26# TrueType 'name' platform codes
27my %ttPlatform = (
28    0 => 'Unicode',
29    1 => 'Macintosh',
30    2 => 'ISO',
31    3 => 'Windows',
32    4 => 'Custom',
33);
34
35# convert TrueType 'name' character encoding to ExifTool Charset (ref 1/2)
36my %ttCharset = (
37  Macintosh => {
38    0 => 'MacRoman',      17 => 'MacMalayalam',
39    1 => 'MacJapanese',   18 => 'MacSinhalese',
40    2 => 'MacChineseTW',  19 => 'MacBurmese',
41    3 => 'MacKorean',     20 => 'MacKhmer',
42    4 => 'MacArabic',     21 => 'MacThai',
43    5 => 'MacHebrew',     22 => 'MacLaotian',
44    6 => 'MacGreek',      23 => 'MacGeorgian',
45    7 => 'MacCyrillic',   24 => 'MacArmenian', # 7=Russian
46    8 => 'MacRSymbol',    25 => 'MacChineseCN',
47    9 => 'MacDevanagari', 26 => 'MacTibetan',
48   10 => 'MacGurmukhi',   27 => 'MacMongolian',
49   11 => 'MacGujarati',   28 => 'MacGeez',
50   12 => 'MacOriya',      29 => 'MacCyrillic', # 29=Slavic
51   13 => 'MacBengali',    30 => 'MacVietnam',
52   14 => 'MacTamil',      31 => 'MacSindhi',
53   15 => 'MacTelugu',     32 => '', # 32=uninterpreted
54   16 => 'MacKannada',
55  },
56  Windows => {
57    0 => 'Symbol',         4 => 'Big5',
58    1 => 'UCS2',           5 => 'Wansung',
59    2 => 'ShiftJIS',       6 => 'Johab',
60    3 => 'PRC',           10 => 'UCS4',
61  },
62  Unicode => {
63    # (we don't currently handle the various Unicode flavours)
64    0 => 'UCS2', # Unicode 1.0 semantics
65    1 => 'UCS2', # Unicode 1.1 semantics
66    2 => 'UCS2', # ISO 10646 semantics
67    3 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode BMP only.
68    4 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode full repertoire.
69    # 5 => Unicode Variation Sequences (not used in Naming table)
70  },
71  ISO => { # (deprecated)
72    0 => 'UTF8',  # (7-bit ASCII)
73    1 => 'UCS2',  # ISO 10646
74    2 => 'Latin', # ISO 8859-1
75  },
76  Custom => { },
77);
78
79# convert TrueType 'name' language code to ExifTool language code
80%ttLang = (
81  # Macintosh language codes (also used by QuickTime.pm)
82  # oddities:
83  #   49 - Cyrillic version    83 - Roman
84  #   50 - Arabic version     84 - Arabic
85  #  146 - with dot above
86  Macintosh => {
87    0 => 'en',     24 => 'lt',    48 => 'kk',    72 => 'ml',    129 => 'eu',
88    1 => 'fr',     25 => 'pl',    49 => 'az',    73 => 'kn',    130 => 'ca',
89    2 => 'de',     26 => 'hu',    50 => 'az',    74 => 'ta',    131 => 'la',
90    3 => 'it',     27 => 'et',    51 => 'hy',    75 => 'te',    132 => 'qu',
91    4 => 'nl-NL',  28 => 'lv',    52 => 'ka',    76 => 'si',    133 => 'gn',
92    5 => 'sv',     29 => 'smi',   53 => 'ro',    77 => 'my',    134 => 'ay',
93    6 => 'es',     30 => 'fo',    54 => 'ky',    78 => 'km',    135 => 'tt',
94    7 => 'da',     31 => 'fa',    55 => 'tg',    79 => 'lo',    136 => 'ug',
95    8 => 'pt',     32 => 'ru',    56 => 'tk',    80 => 'vi',    137 => 'dz',
96    9 => 'no',     33 => 'zh-CN', 57 => 'mn-MN', 81 => 'id',    138 => 'jv',
97    10 => 'he',    34 => 'nl-BE', 58 => 'mn-CN', 82 => 'tl',    139 => 'su',
98    11 => 'ja',    35 => 'ga',    59 => 'ps',    83 => 'ms-MY', 140 => 'gl',
99    12 => 'ar',    36 => 'sq',    60 => 'ku',    84 => 'ms-BN', 141 => 'af',
100    13 => 'fi',    37 => 'ro',    61 => 'ks',    85 => 'am',    142 => 'br',
101    14 => 'el',    38 => 'cs',    62 => 'sd',    86 => 'ti',    144 => 'gd',
102    15 => 'is',    39 => 'sk',    63 => 'bo',    87 => 'om',    145 => 'gv',
103    16 => 'mt',    40 => 'sl',    64 => 'ne',    88 => 'so',    146 => 'ga',
104    17 => 'tr',    41 => 'yi',    65 => 'sa',    89 => 'sw',    147 => 'to',
105    18 => 'hr',    42 => 'sr',    66 => 'mr',    90 => 'rw',    148 => 'el',
106    19 => 'zh-TW', 43 => 'mk',    67 => 'bn',    91 => 'rn',    149 => 'kl',
107    20 => 'ur',    44 => 'bg',    68 => 'as',    92 => 'ny',    150 => 'az',
108    21 => 'hi',    45 => 'uk',    69 => 'gu',    93 => 'mg',
109    22 => 'th',    46 => 'be',    70 => 'pa',    94 => 'eo',
110    23 => 'ko',    47 => 'uz',    71 => 'or',   128 => 'cy',
111  },
112  # Windows language codes (http://msdn.microsoft.com/en-us/library/0h88fahh(VS.85).aspx)
113  # Notes: This isn't an exact science.  The reference above gives language codes
114  # which are different from some ISO 639-1 numbers.  Also, some Windows language
115  # codes don't appear to have ISO 639-1 equivalents.
116  #  0x0428 - fa by ref above
117  #  0x048c - no ISO equivalent
118  #  0x081a/0x83c - sr-SP
119  #  0x0c0a - modern?
120  #  0x2409 - Caribbean country code not found in ISO 3166-1
121  Windows => {
122    0x0401 => 'ar-SA', 0x0438 => 'fo',    0x0481 => 'mi',    0x1409 => 'en-NZ',
123    0x0402 => 'bg',    0x0439 => 'hi',    0x0482 => 'oc',    0x140a => 'es-CR',
124    0x0403 => 'ca',    0x043a => 'mt',    0x0483 => 'co',    0x140c => 'fr-LU',
125    0x0404 => 'zh-TW', 0x043b => 'se-NO', 0x0484 => 'gsw',   0x141a => 'bs-BA',
126    0x0405 => 'cs',    0x043c => 'gd',    0x0485 => 'sah',   0x143b => 'smj-SE',
127    0x0406 => 'da',    0x043d => 'yi',    0x0486 => 'ny',    0x1801 => 'ar-MA',
128    0x0407 => 'de-DE', 0x043e => 'ms-MY', 0x0487 => 'rw',    0x1809 => 'en-IE',
129    0x0408 => 'el',    0x043f => 'kk',    0x048c => 'Dari',  0x180a => 'es-PA',
130    0x0409 => 'en-US', 0x0440 => 'ky',    0x0801 => 'ar-IQ', 0x180c => 'fr-MC',
131    0x040a => 'es-ES', 0x0441 => 'sw',    0x0804 => 'zh-CN', 0x181a => 'sr-BA',
132    0x040b => 'fi',    0x0442 => 'tk',    0x0807 => 'de-CH', 0x183b => 'sma-NO',
133    0x040c => 'fr-FR', 0x0443 => 'uz-UZ', 0x0809 => 'en-GB', 0x1c01 => 'ar-TN',
134    0x040d => 'he',    0x0444 => 'tt',    0x080a => 'es-MX', 0x1c09 => 'en-ZA',
135    0x040e => 'hu',    0x0445 => 'bn-IN', 0x080c => 'fr-BE', 0x1c0a => 'es-DO',
136    0x040f => 'is',    0x0446 => 'pa',    0x0810 => 'it-CH', 0x1c1a => 'sr-BA',
137    0x0410 => 'it-IT', 0x0447 => 'gu',    0x0813 => 'nl-BE', 0x1c3b => 'sma-SE',
138    0x0411 => 'ja',    0x0448 => 'wo',    0x0814 => 'nn',    0x2001 => 'ar-OM',
139    0x0412 => 'ko',    0x0449 => 'ta',    0x0816 => 'pt-PT', 0x2009 => 'en-JM',
140    0x0413 => 'nl-NL', 0x044a => 'te',    0x0818 => 'ro-MO', 0x200a => 'es-VE',
141    0x0414 => 'no-NO', 0x044b => 'kn',    0x0819 => 'ru-MO', 0x201a => 'bs-BA',
142    0x0415 => 'pl',    0x044c => 'ml',    0x081a => 'sr-RS', 0x203b => 'sms',
143    0x0416 => 'pt-BR', 0x044d => 'as',    0x081d => 'sv-FI', 0x2401 => 'ar-YE',
144    0x0417 => 'rm',    0x044e => 'mr',    0x082c => 'az-AZ', 0x2409 => 'en-CB',
145    0x0418 => 'ro',    0x044f => 'sa',    0x082e => 'dsb',   0x240a => 'es-CO',
146    0x0419 => 'ru',    0x0450 => 'mn-MN', 0x083b => 'se-SE', 0x243b => 'smn',
147    0x041a => 'hr',    0x0451 => 'bo',    0x083c => 'ga',    0x2801 => 'ar-SY',
148    0x041b => 'sk',    0x0452 => 'cy',    0x083e => 'ms-BN', 0x2809 => 'en-BZ',
149    0x041c => 'sq',    0x0453 => 'km',    0x0843 => 'uz-UZ', 0x280a => 'es-PE',
150    0x041d => 'sv-SE', 0x0454 => 'lo',    0x0845 => 'bn-BD', 0x2c01 => 'ar-JO',
151    0x041e => 'th',    0x0456 => 'gl',    0x0850 => 'mn-CN', 0x2c09 => 'en-TT',
152    0x041f => 'tr',    0x0457 => 'kok',   0x085d => 'iu-CA', 0x2c0a => 'es-AR',
153    0x0420 => 'ur',    0x045a => 'syr',   0x085f => 'tmh',   0x3001 => 'ar-LB',
154    0x0421 => 'id',    0x045b => 'si',    0x086b => 'qu-EC', 0x3009 => 'en-ZW',
155    0x0422 => 'uk',    0x045d => 'iu-CA', 0x0c01 => 'ar-EG', 0x300a => 'es-EC',
156    0x0423 => 'be',    0x045e => 'am',    0x0c04 => 'zh-HK', 0x3401 => 'ar-KW',
157    0x0424 => 'sl',    0x0461 => 'ne',    0x0c07 => 'de-AT', 0x3409 => 'en-PH',
158    0x0425 => 'et',    0x0462 => 'fy',    0x0c09 => 'en-AU', 0x340a => 'es-CL',
159    0x0426 => 'lv',    0x0463 => 'ps',    0x0c0a => 'es-ES', 0x3801 => 'ar-AE',
160    0x0427 => 'lt',    0x0464 => 'fil',   0x0c0c => 'fr-CA', 0x380a => 'es-UY',
161    0x0428 => 'tg',    0x0465 => 'dv',    0x0c1a => 'sr-RS', 0x3c01 => 'ar-BH',
162    0x042a => 'vi',    0x0468 => 'ha',    0x0c3b => 'se-FI', 0x3c0a => 'es-PY',
163    0x042b => 'hy',    0x046a => 'yo',    0x0c6b => 'qu-PE', 0x4001 => 'ar-QA',
164    0x042c => 'az-AZ', 0x046b => 'qu-BO', 0x1001 => 'ar-LY', 0x4009 => 'en-IN',
165    0x042d => 'eu',    0x046c => 'st',    0x1004 => 'zh-SG', 0x400a => 'es-BO',
166    0x042e => 'hsb',   0x046d => 'ba',    0x1007 => 'de-LU', 0x4409 => 'en-MY',
167    0x042f => 'mk',    0x046e => 'lb',    0x1009 => 'en-CA', 0x440a => 'es-SV',
168    0x0430 => 'st',    0x046f => 'kl',    0x100a => 'es-GT', 0x4809 => 'en-SG',
169    0x0431 => 'ts',    0x0470 => 'ig',    0x100c => 'fr-CH', 0x480a => 'es-HN',
170    0x0432 => 'tn',    0x0478 => 'yi',    0x101a => 'hr-BA', 0x4c0a => 'es-NI',
171    0x0434 => 'xh',    0x047a => 'arn',   0x103b => 'smj-NO',0x500a => 'es-PR',
172    0x0435 => 'zu',    0x047c => 'moh',   0x1401 => 'ar-DZ', 0x540a => 'es-US',
173    0x0436 => 'af',    0x047e => 'br',    0x1404 => 'zh-MO',
174    0x0437 => 'ka',    0x0480 => 'ug',    0x1407 => 'de-LI',
175  },
176  Unicode => { },
177  ISO     => { },
178  Custom  => { },
179);
180
181# eclectic table of tags for various format font files
182%Image::ExifTool::Font::Main = (
183    GROUPS => { 2 => 'Document' },
184    NOTES => q{
185        This table contains a collection of tags found in font files of various
186        formats.  ExifTool current recognizes OTF, TTF, TTC, DFONT, PFA, PFB, PFM,
187        AFM, ACFM and AMFM font files.
188    },
189    name => {
190        SubDirectory => { TagTable => 'Image::ExifTool::Font::Name' },
191    },
192    PFM  => {
193        Name => 'PFMHeader',
194        SubDirectory => { TagTable => 'Image::ExifTool::Font::PFM' },
195    },
196    PSInfo => {
197        Name => 'PSFontInfo',
198        SubDirectory => { TagTable => 'Image::ExifTool::Font::PSInfo' },
199    },
200    AFM => {
201        Name => 'AFM',
202        SubDirectory => { TagTable => 'Image::ExifTool::Font::AFM' },
203    },
204    numfonts => 'NumFonts',
205    fontname => 'FontName',
206    postfont => {
207        Name => 'PostScriptFontName',
208        Description => 'PostScript Font Name',
209    },
210);
211
212# TrueType name tags (ref 1/2)
213%Image::ExifTool::Font::Name = (
214    GROUPS => { 2 => 'Document' },
215    NOTES => q{
216        The following tags are extracted from the TrueType font "name" table found
217        in OTF, TTF, TTC and DFONT files.  These tags support localized languages by
218        adding a hyphen followed by a language code to the end of the tag name (eg.
219        "Copyright-fr" or "License-en-US").  Tags with no language code use the
220        default language of "en".
221    },
222    0 => { Name => 'Copyright', Groups => { 2 => 'Author' } },
223    1 => 'FontFamily',
224    2 => 'FontSubfamily',
225    3 => 'FontSubfamilyID',
226    4 => 'FontName', # full name
227    5 => 'NameTableVersion',
228    6 => { Name => 'PostScriptFontName', Description => 'PostScript Font Name' },
229    7 => 'Trademark',
230    8 => 'Manufacturer',
231    9 => 'Designer',
232    10 => 'Description',
233    11 => 'VendorURL',
234    12 => 'DesignerURL',
235    13 => 'License',
236    14 => 'LicenseInfoURL',
237    16 => 'PreferredFamily',
238    17 => 'PreferredSubfamily',
239    18 => 'CompatibleFontName',
240    19 => 'SampleText',
241    20 => {
242        Name => 'PostScriptFontName',
243        Description => 'PostScript Font Name',
244    },
245    21 => 'WWSFamilyName',
246    22 => 'WWSSubfamilyName',
247);
248
249# PostScript Font Metric file header (ref 4)
250%Image::ExifTool::Font::PFM = (
251    GROUPS => { 2 => 'Document' },
252    PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
253    NOTES => 'Tags extracted from the PFM file header.',
254    0 => {
255        Name => 'PFMVersion',
256        Format => 'int16u',
257        PrintConv => 'sprintf("%x.%.2x",$val>>8,$val&0xff)',
258    },
259    6  => { Name => 'Copyright',       Format => 'string[60]', Groups => { 2 => 'Author' } },
260    66 => { Name => 'FontType',        Format => 'int16u' },
261    68 => { Name => 'PointSize',       Format => 'int16u' },
262    70 => { Name => 'YResolution',     Format => 'int16u' },
263    72 => { Name => 'XResolution',     Format => 'int16u' },
264    74 => { Name => 'Ascent',          Format => 'int16u' },
265    76 => { Name => 'InternalLeading', Format => 'int16u' },
266    78 => { Name => 'ExternalLeading', Format => 'int16u' },
267    80 => { Name => 'Italic' },
268    81 => { Name => 'Underline' },
269    82 => { Name => 'Strikeout' },
270    83 => { Name => 'Weight',          Format => 'int16u' },
271    85 => { Name => 'CharacterSet' },
272    86 => { Name => 'PixWidth',        Format => 'int16u' },
273    88 => { Name => 'PixHeight',       Format => 'int16u' },
274    90 => { Name => 'PitchAndFamily' },
275    91 => { Name => 'AvgWidth',        Format => 'int16u' },
276    93 => { Name => 'MaxWidth',        Format => 'int16u' },
277    95 => { Name => 'FirstChar' },
278    96 => { Name => 'LastChar' },
279    97 => { Name => 'DefaultChar' },
280    98 => { Name => 'BreakChar' },
281    99 => { Name => 'WidthBytes',      Format => 'int16u' },
282   # 101 => { Name => 'DeviceTypeOffset', Format => 'int32u' },
283   # 105 => { Name => 'FontNameOffset',   Format => 'int32u' },
284   # 109 => { Name => 'BitsPointer',      Format => 'int32u' },
285   # 113 => { Name => 'BitsOffset',       Format => 'int32u' },
286);
287
288# PostScript FontInfo attributes (PFA, PFB) (ref PH)
289%Image::ExifTool::Font::PSInfo = (
290    GROUPS => { 2 => 'Document' },
291    NOTES => 'Tags extracted from PostScript font files (PFA and PFB).',
292    FullName    => { },
293    FamilyName  => { Name => 'FontFamily' },
294    Weight      => { },
295    ItalicAngle => { },
296    isFixedPitch=> { },
297    UnderlinePosition  => { },
298    UnderlineThickness => { },
299    Copyright   => { Groups => { 2 => 'Author' } },
300    Notice      => { Groups => { 2 => 'Author' } },
301    version     => { },
302    FontName    => { },
303    FontType    => { },
304    FSType      => { },
305);
306
307# Adobe Font Metrics tags (AFM) (ref 6)
308%Image::ExifTool::Font::AFM = (
309    GROUPS => { 2 => 'Document' },
310    NOTES => 'Tags extracted from Adobe Font Metrics files (AFM, ACFM and AMFM).',
311   'Creation Date' => { Name => 'CreateDate', Groups => { 2 => 'Time' } },
312    FontName    => { },
313    FullName    => { },
314    FamilyName => { Name => 'FontFamily' },
315    Weight      => { },
316    Version     => { },
317    Notice      => { Groups => { 2 => 'Author' } },
318    EncodingScheme => { },
319    MappingScheme  => { },
320    EscChar     => { },
321    CharacterSet=> { },
322    Characters  => { },
323    IsBaseFont  => { },
324   # VVector     => { },
325    IsFixedV    => { },
326    CapHeight   => { },
327    XHeight     => { },
328    Ascender    => { },
329    Descender   => { },
330);
331
332#------------------------------------------------------------------------------
333# Read information from a TrueType font collection (TTC) (refs 2,3)
334# Inputs: 0) ExifTool ref, 1) dirInfo ref
335# Returns: 1 on success, 0 if this wasn't a valid TrueType font collection
336sub ProcessTTC($$)
337{
338    my ($et, $dirInfo) = @_;
339    my $raf = $$dirInfo{RAF};
340    my ($buff, $i);
341
342    return 0 unless $raf->Read($buff, 12) == 12;
343    return 0 unless $buff =~ /^ttcf\0[\x01\x02]\0\0/;
344    SetByteOrder('MM');
345    my $num = Get32u(\$buff, 8);
346    # might as well put a limit on the number of fonts we will parse (< 256)
347    return 0 unless $num < 0x100 and $raf->Read($buff, $num * 4) == $num * 4;
348    $et->SetFileType('TTC');
349    return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
350    my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main');
351    $et->HandleTag($tagTablePtr, 'numfonts', $num);
352    # loop through all fonts in the collection
353    for ($i=0; $i<$num; ++$i) {
354        my $n = $i + 1;
355        $et->VPrint(0, "Font $n:\n");
356        $$et{SET_GROUP1} = "+$n";
357        my $offset = Get32u(\$buff, $i * 4);
358        $raf->Seek($offset, 0) or last;
359        ProcessOTF($et, $dirInfo) or last;
360    }
361    delete $$et{SET_GROUP1};
362    return 1;
363}
364
365#------------------------------------------------------------------------------
366# Read information from a TrueType font file (OTF or TTF) (refs 1,2)
367# Inputs: 0) ExifTool ref, 1) dirInfo ref
368# Returns: 1 on success, 0 if this wasn't a valid TrueType font file
369sub ProcessOTF($$)
370{
371    my ($et, $dirInfo) = @_;
372    my $raf = $$dirInfo{RAF};
373    my ($tbl, $buff, $pos, $i);
374    my $base = $$dirInfo{Base} || 0;
375
376    return 0 unless $raf->Read($buff, 12) == 12;
377    return 0 unless $buff =~ /^(\0\x01\0\0|OTTO|true|typ1|\xa5(kbd|lst))[\0\x01]/;
378
379    $et->SetFileType($1 eq 'OTTO' ? 'OTF' : 'TTF');
380    return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
381    SetByteOrder('MM');
382    my $numTables = Get16u(\$buff, 4);
383    return 0 unless $numTables > 0 and $numTables < 0x200;
384    my $len = $numTables * 16;
385    return 0 unless $raf->Read($tbl, $len) == $len;
386
387    my $verbose = $et->Options('Verbose');
388    my $oldIndent = $$et{INDENT};
389    $$et{INDENT} .= '| ';
390    $et->VerboseDir('TrueType', $numTables) if $verbose;
391
392    for ($pos=0; $pos<$len; $pos+=16) {
393        # look for 'name' table
394        my $tag = substr($tbl, $pos, 4);
395        next unless $tag eq 'name' or $verbose;
396        my $offset = Get32u(\$tbl, $pos + 8);
397        my $size   = Get32u(\$tbl, $pos + 12);
398        unless ($raf->Seek($offset+$base, 0) and $raf->Read($buff, $size) == $size) {
399            $et->Warn("Error reading '${tag}' data");
400            next;
401        }
402        if ($verbose) {
403            $tag =~ s/([\0-\x1f\x80-\xff])/sprintf('\x%.2x',ord $1)/ge;
404            my $str = sprintf("%s%d) Tag '%s' (offset 0x%.4x, %d bytes)\n",
405                              $$et{INDENT}, $pos/16, $tag, $offset, $size);
406            $et->VPrint(0, $str);
407            $et->VerboseDump(\$buff, Addr => $offset) if $verbose > 2;
408            next unless $tag eq 'name';
409        }
410        next unless $size >= 8;
411        my $entries = Get16u(\$buff, 2);
412        my $recEnd = 6 + $entries * 12;
413        if ($recEnd > $size) {
414            $et->Warn('Truncated name record');
415            last;
416        }
417        my $strStart = Get16u(\$buff, 4);
418        if ($strStart < $recEnd or $strStart > $size) {
419            $et->Warn('Invalid string offset');
420            last;
421        }
422        # parse language-tag record (in format 1 Naming table only) (ref 2)
423        my %langTag;
424        if (Get16u(\$buff, 0) == 1 and $recEnd + 2 <= $size) {
425            my $langTags = Get16u(\$buff, $recEnd);
426            if ($langTags and $recEnd + 2 + $langTags * 4 < $size) {
427                for ($i=0; $i<$langTags; ++$i) {
428                    my $pt = $recEnd + 2 + $i * 4;
429                    my $langLen = Get16u(\$buff, $pt);
430                    # make sure the language string length is reasonable (UTF-16BE)
431                    last if $langLen == 0 or $langLen & 0x01 or $langLen > 40;
432                    my $langPt = Get16u(\$buff, $pt + 2) + $strStart;
433                    last if $langPt + $langLen > $size;
434                    my $lang = substr($buff, $langPt, $langLen);
435                    $lang = $et->Decode($lang,'UCS2','MM','UTF8');
436                    $lang =~ tr/-_a-zA-Z0-9//dc;    # remove naughty characters
437                    $langTag{$i + 0x8000} = $lang;
438                }
439            }
440        }
441        my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Name');
442        $$et{INDENT} .= '| ';
443        $et->VerboseDir('Name', $entries) if $verbose;
444        for ($i=0; $i<$entries; ++$i) {
445            my $pt = 6 + $i * 12;
446            my $platform = Get16u(\$buff, $pt);
447            my $encoding = Get16u(\$buff, $pt + 2);
448            my $langID   = Get16u(\$buff, $pt + 4);
449            my $nameID   = Get16u(\$buff, $pt + 6);
450            my $strLen   = Get16u(\$buff, $pt + 8);
451            my $strPt    = Get16u(\$buff, $pt + 10) + $strStart;
452            if ($strPt + $strLen <= $size) {
453                my $val = substr($buff, $strPt, $strLen);
454                my ($lang, $charset, $extra);
455                my $sys = $ttPlatform{$platform};
456                # translate from specified encoding
457                if ($sys) {
458                    $lang = $ttLang{$sys}{$langID} || $langTag{$langID};
459                    $charset = $ttCharset{$sys}{$encoding};
460                    if (not $charset) {
461                        if (not defined $charset and not $$et{FontWarn}) {
462                            $et->Warn("Unknown $sys character set ($encoding)");
463                            $$et{FontWarn} = 1;
464                        }
465                    } else {
466                        # translate to ExifTool character set
467                        $val = $et->Decode($val, $charset);
468                    }
469                } else {
470                    $et->Warn("Unknown platform ($platform) for name $nameID");
471                }
472                # get the tagInfo for our specific language (use 'en' for default)
473                my $tagInfo = $et->GetTagInfo($tagTablePtr, $nameID);
474                if ($tagInfo and $lang and $lang ne 'en') {
475                    my $langInfo = Image::ExifTool::GetLangInfo($tagInfo, $lang);
476                    $tagInfo = $langInfo if $langInfo;
477                }
478                if ($verbose) {
479                    $langID > 0x400 and $langID = sprintf('0x%x', $langID);
480                    $extra = ", Plat=$platform/" . ($sys || 'Unknown') . ', ' .
481                               "Enc=$encoding/" . ($charset || 'Unknown') . ', ' .
482                               "Lang=$langID/" . ($lang || 'Unknown');
483                }
484                $et->HandleTag($tagTablePtr, $nameID, $val,
485                    TagInfo => $tagInfo,
486                    DataPt  => \$buff,
487                    DataPos => $offset,
488                    Start   => $strPt,
489                    Size    => $strLen,
490                    Index   => $i,
491                    Extra   => $extra,
492                );
493            }
494        }
495        $$et{INDENT} = $oldIndent . '| ';
496        last unless $verbose;
497    }
498    $$et{INDENT} = $oldIndent;
499    return 1;
500}
501
502#------------------------------------------------------------------------------
503# Read information from an Adobe Font Metrics file (AFM, ACFM, AMFM) (ref 6)
504# Inputs: 0) ExifTool ref, 1) dirInfo ref
505# Returns: 1 on success, 0 if this wasn't a recognized AFM-type file
506sub ProcessAFM($$)
507{
508    my ($et, $dirInfo) = @_;
509    my $raf = $$dirInfo{RAF};
510    my ($buff, $comment);
511
512    require Image::ExifTool::PostScript;
513    local $/ = Image::ExifTool::PostScript::GetInputRecordSeparator($raf);
514    $raf->ReadLine($buff);
515    return 0 unless $buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/;
516    my $ftyp = $1 ? ($1 eq 'Comp' ? 'ACFM' : 'AMFM') : 'AFM';
517    $et->SetFileType($ftyp, 'application/x-font-afm');
518    return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
519    my $tagTablePtr = GetTagTable('Image::ExifTool::Font::AFM');
520
521    for (;;) {
522        $raf->ReadLine($buff) or last;
523        if (defined $comment and $buff !~ /^Comment\s/) {
524            $et->FoundTag('Comment', $comment);
525            undef $comment;
526        }
527        $buff =~ /^(\w+)\s+(.*?)[\x0d\x0a]/ or next;
528        my ($tag, $val) = ($1, $2);
529        if ($tag eq 'Comment' and $val =~ /^(Creation Date):\s+(.*)/) {
530            ($tag, $val) = ($1, $2);
531        }
532        $val =~ s/^\((.*)\)$/$1/;   # (some values may be in brackets)
533        if ($tag eq 'Comment') {
534            # concatinate all comments into a single value
535            $comment = defined($comment) ? "$comment\n$val" : $val;
536            next;
537        }
538        unless ($et->HandleTag($tagTablePtr, $tag, $val)) {
539            # end parsing if we start any subsection
540            last if $tag =~ /^Start/ and $tag ne 'StartDirection';
541        }
542    }
543    return 1;
544}
545
546#------------------------------------------------------------------------------
547# Read information from various format font files
548# Inputs: 0) ExifTool ref, 1) dirInfo ref
549# Returns: 1 on success, 0 if this wasn't a recognized Font file
550sub ProcessFont($$)
551{
552    my ($et, $dirInfo) = @_;
553    my $raf = $$dirInfo{RAF};
554    my ($buff, $buf2, $rtnVal);
555    return 0 unless $raf->Read($buff, 24) and $raf->Seek(0,0);
556    if ($buff =~ /^(\0\x01\0\0|OTTO|true|typ1)[\0\x01]/) {        # OTF, TTF
557        $rtnVal = ProcessOTF($et, $dirInfo);
558    } elsif ($buff =~ /^ttcf\0[\x01\x02]\0\0/) {                  # TTC
559        $rtnVal = ProcessTTC($et, $dirInfo);
560    } elsif ($buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/s) { # AFM
561        $rtnVal = ProcessAFM($et, $dirInfo);
562    } elsif ($buff =~ /^(.{6})?%!(PS-(AdobeFont-|Bitstream )|FontType1-)/s) {# PFA, PFB
563        $raf->Seek(6,0) and $et->SetFileType('PFB') if $1;
564        require Image::ExifTool::PostScript;
565        $rtnVal = Image::ExifTool::PostScript::ProcessPS($et, $dirInfo);
566    } elsif ($buff =~ /^\0[\x01\x02]/ and $raf->Seek(0, 2) and    # PFM
567             # validate file size
568             $raf->Tell() > 117 and $raf->Tell() == unpack('x2V',$buff) and
569             # read PFM header
570             $raf->Seek(0,0) and $raf->Read($buff,117) == 117 and
571             # validate "DeviceType" string (must be "PostScript\0")
572             SetByteOrder('II') and $raf->Seek(Get32u(\$buff, 101), 0) and
573             # the DeviceType should be "PostScript\0", but FontForge
574             # incorrectly writes "Postscript\0", so ignore case
575             $raf->Read($buf2, 11) == 11 and lc($buf2) eq "postscript\0")
576    {
577        $et->SetFileType('PFM');
578        return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
579        SetByteOrder('II');
580        my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main');
581        # process the PFM header
582        $et->HandleTag($tagTablePtr, 'PFM', $buff);
583        # extract the font names
584        my $nameOff = Get32u(\$buff, 105);
585        if ($raf->Seek($nameOff, 0) and $raf->Read($buff, 256) and
586            $buff =~ /^([\x20-\xff]+)\0([\x20-\xff]+)\0/)
587        {
588            $et->HandleTag($tagTablePtr, 'fontname', $1);
589            $et->HandleTag($tagTablePtr, 'postfont', $2);
590        }
591        $rtnVal = 1;
592    } elsif ($buff =~ /^(wOF[F2])/) {
593        my $type = $1 eq 'wOFF' ? 'woff' : 'woff2';
594        $et->SetFileType(uc($type), "font/$type");
595        # (don't yet extract metadata from these files)
596        $rtnVal = 1;
597    } else {
598        $rtnVal = 0;
599    }
600    return $rtnVal;
601}
602
6031;  # end
604
605__END__
606
607=head1 NAME
608
609Image::ExifTool::Font - Read meta information from font files
610
611=head1 SYNOPSIS
612
613This module is used by Image::ExifTool
614
615=head1 DESCRIPTION
616
617This module contains the routines required by Image::ExifTool to read meta
618information from various format font files.  Currently recognized font file
619types are OTF, TTF, TTC, DFONT, PFA, PFB, PFM, AFM, ACFM and AMFM.  As well,
620WOFF and WOFF2 font files are identified, but metadata is not currently
621extracted from these formats.
622
623=head1 AUTHOR
624
625Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
626
627This library is free software; you can redistribute it and/or modify it
628under the same terms as Perl itself.
629
630=head1 REFERENCES
631
632=over 4
633
634=item L<http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html>
635
636=item L<http://www.microsoft.com/typography/otspec/otff.htm>
637
638=item L<http://partners.adobe.com/public/developer/opentype/index_font_file.html>
639
640=item L<http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf>
641
642=item L<http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java>
643
644=item L<http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf>
645
646=back
647
648=head1 SEE ALSO
649
650L<Image::ExifTool::TagNames/Font Tags>,
651L<Image::ExifTool(3pm)|Image::ExifTool>
652
653=cut
654
655