1#------------------------------------------------------------------------------ 2# File: Font.pm 3# 4# Description: Read meta information from font files 5# 6# Revisions: 2010/01/15 - P. Harvey Created 7# 8# References: 1) http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html 9# 2) http://www.microsoft.com/typography/otspec/otff.htm 10# 3) http://partners.adobe.com/public/developer/opentype/index_font_file.html 11# 4) http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf 12# 5) http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java 13# 6) http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf 14#------------------------------------------------------------------------------ 15 16package Image::ExifTool::Font; 17 18use strict; 19use vars qw($VERSION %ttLang); 20use Image::ExifTool qw(:DataAccess :Utils); 21 22$VERSION = '1.10'; 23 24sub ProcessOTF($$); 25 26# TrueType 'name' platform codes 27my %ttPlatform = ( 28 0 => 'Unicode', 29 1 => 'Macintosh', 30 2 => 'ISO', 31 3 => 'Windows', 32 4 => 'Custom', 33); 34 35# convert TrueType 'name' character encoding to ExifTool Charset (ref 1/2) 36my %ttCharset = ( 37 Macintosh => { 38 0 => 'MacRoman', 17 => 'MacMalayalam', 39 1 => 'MacJapanese', 18 => 'MacSinhalese', 40 2 => 'MacChineseTW', 19 => 'MacBurmese', 41 3 => 'MacKorean', 20 => 'MacKhmer', 42 4 => 'MacArabic', 21 => 'MacThai', 43 5 => 'MacHebrew', 22 => 'MacLaotian', 44 6 => 'MacGreek', 23 => 'MacGeorgian', 45 7 => 'MacCyrillic', 24 => 'MacArmenian', # 7=Russian 46 8 => 'MacRSymbol', 25 => 'MacChineseCN', 47 9 => 'MacDevanagari', 26 => 'MacTibetan', 48 10 => 'MacGurmukhi', 27 => 'MacMongolian', 49 11 => 'MacGujarati', 28 => 'MacGeez', 50 12 => 'MacOriya', 29 => 'MacCyrillic', # 29=Slavic 51 13 => 'MacBengali', 30 => 'MacVietnam', 52 14 => 'MacTamil', 31 => 'MacSindhi', 53 15 => 'MacTelugu', 32 => '', # 32=uninterpreted 54 16 => 'MacKannada', 55 }, 56 Windows => { 57 0 => 'Symbol', 4 => 'Big5', 58 1 => 'UCS2', 5 => 'Wansung', 59 2 => 'ShiftJIS', 6 => 'Johab', 60 3 => 'PRC', 10 => 'UCS4', 61 }, 62 Unicode => { 63 # (we don't currently handle the various Unicode flavours) 64 0 => 'UCS2', # Unicode 1.0 semantics 65 1 => 'UCS2', # Unicode 1.1 semantics 66 2 => 'UCS2', # ISO 10646 semantics 67 3 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode BMP only. 68 4 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode full repertoire. 69 # 5 => Unicode Variation Sequences (not used in Naming table) 70 }, 71 ISO => { # (deprecated) 72 0 => 'UTF8', # (7-bit ASCII) 73 1 => 'UCS2', # ISO 10646 74 2 => 'Latin', # ISO 8859-1 75 }, 76 Custom => { }, 77); 78 79# convert TrueType 'name' language code to ExifTool language code 80%ttLang = ( 81 # Macintosh language codes (also used by QuickTime.pm) 82 # oddities: 83 # 49 - Cyrillic version 83 - Roman 84 # 50 - Arabic version 84 - Arabic 85 # 146 - with dot above 86 Macintosh => { 87 0 => 'en', 24 => 'lt', 48 => 'kk', 72 => 'ml', 129 => 'eu', 88 1 => 'fr', 25 => 'pl', 49 => 'az', 73 => 'kn', 130 => 'ca', 89 2 => 'de', 26 => 'hu', 50 => 'az', 74 => 'ta', 131 => 'la', 90 3 => 'it', 27 => 'et', 51 => 'hy', 75 => 'te', 132 => 'qu', 91 4 => 'nl-NL', 28 => 'lv', 52 => 'ka', 76 => 'si', 133 => 'gn', 92 5 => 'sv', 29 => 'smi', 53 => 'ro', 77 => 'my', 134 => 'ay', 93 6 => 'es', 30 => 'fo', 54 => 'ky', 78 => 'km', 135 => 'tt', 94 7 => 'da', 31 => 'fa', 55 => 'tg', 79 => 'lo', 136 => 'ug', 95 8 => 'pt', 32 => 'ru', 56 => 'tk', 80 => 'vi', 137 => 'dz', 96 9 => 'no', 33 => 'zh-CN', 57 => 'mn-MN', 81 => 'id', 138 => 'jv', 97 10 => 'he', 34 => 'nl-BE', 58 => 'mn-CN', 82 => 'tl', 139 => 'su', 98 11 => 'ja', 35 => 'ga', 59 => 'ps', 83 => 'ms-MY', 140 => 'gl', 99 12 => 'ar', 36 => 'sq', 60 => 'ku', 84 => 'ms-BN', 141 => 'af', 100 13 => 'fi', 37 => 'ro', 61 => 'ks', 85 => 'am', 142 => 'br', 101 14 => 'el', 38 => 'cs', 62 => 'sd', 86 => 'ti', 144 => 'gd', 102 15 => 'is', 39 => 'sk', 63 => 'bo', 87 => 'om', 145 => 'gv', 103 16 => 'mt', 40 => 'sl', 64 => 'ne', 88 => 'so', 146 => 'ga', 104 17 => 'tr', 41 => 'yi', 65 => 'sa', 89 => 'sw', 147 => 'to', 105 18 => 'hr', 42 => 'sr', 66 => 'mr', 90 => 'rw', 148 => 'el', 106 19 => 'zh-TW', 43 => 'mk', 67 => 'bn', 91 => 'rn', 149 => 'kl', 107 20 => 'ur', 44 => 'bg', 68 => 'as', 92 => 'ny', 150 => 'az', 108 21 => 'hi', 45 => 'uk', 69 => 'gu', 93 => 'mg', 109 22 => 'th', 46 => 'be', 70 => 'pa', 94 => 'eo', 110 23 => 'ko', 47 => 'uz', 71 => 'or', 128 => 'cy', 111 }, 112 # Windows language codes (http://msdn.microsoft.com/en-us/library/0h88fahh(VS.85).aspx) 113 # Notes: This isn't an exact science. The reference above gives language codes 114 # which are different from some ISO 639-1 numbers. Also, some Windows language 115 # codes don't appear to have ISO 639-1 equivalents. 116 # 0x0428 - fa by ref above 117 # 0x048c - no ISO equivalent 118 # 0x081a/0x83c - sr-SP 119 # 0x0c0a - modern? 120 # 0x2409 - Caribbean country code not found in ISO 3166-1 121 Windows => { 122 0x0401 => 'ar-SA', 0x0438 => 'fo', 0x0481 => 'mi', 0x1409 => 'en-NZ', 123 0x0402 => 'bg', 0x0439 => 'hi', 0x0482 => 'oc', 0x140a => 'es-CR', 124 0x0403 => 'ca', 0x043a => 'mt', 0x0483 => 'co', 0x140c => 'fr-LU', 125 0x0404 => 'zh-TW', 0x043b => 'se-NO', 0x0484 => 'gsw', 0x141a => 'bs-BA', 126 0x0405 => 'cs', 0x043c => 'gd', 0x0485 => 'sah', 0x143b => 'smj-SE', 127 0x0406 => 'da', 0x043d => 'yi', 0x0486 => 'ny', 0x1801 => 'ar-MA', 128 0x0407 => 'de-DE', 0x043e => 'ms-MY', 0x0487 => 'rw', 0x1809 => 'en-IE', 129 0x0408 => 'el', 0x043f => 'kk', 0x048c => 'Dari', 0x180a => 'es-PA', 130 0x0409 => 'en-US', 0x0440 => 'ky', 0x0801 => 'ar-IQ', 0x180c => 'fr-MC', 131 0x040a => 'es-ES', 0x0441 => 'sw', 0x0804 => 'zh-CN', 0x181a => 'sr-BA', 132 0x040b => 'fi', 0x0442 => 'tk', 0x0807 => 'de-CH', 0x183b => 'sma-NO', 133 0x040c => 'fr-FR', 0x0443 => 'uz-UZ', 0x0809 => 'en-GB', 0x1c01 => 'ar-TN', 134 0x040d => 'he', 0x0444 => 'tt', 0x080a => 'es-MX', 0x1c09 => 'en-ZA', 135 0x040e => 'hu', 0x0445 => 'bn-IN', 0x080c => 'fr-BE', 0x1c0a => 'es-DO', 136 0x040f => 'is', 0x0446 => 'pa', 0x0810 => 'it-CH', 0x1c1a => 'sr-BA', 137 0x0410 => 'it-IT', 0x0447 => 'gu', 0x0813 => 'nl-BE', 0x1c3b => 'sma-SE', 138 0x0411 => 'ja', 0x0448 => 'wo', 0x0814 => 'nn', 0x2001 => 'ar-OM', 139 0x0412 => 'ko', 0x0449 => 'ta', 0x0816 => 'pt-PT', 0x2009 => 'en-JM', 140 0x0413 => 'nl-NL', 0x044a => 'te', 0x0818 => 'ro-MO', 0x200a => 'es-VE', 141 0x0414 => 'no-NO', 0x044b => 'kn', 0x0819 => 'ru-MO', 0x201a => 'bs-BA', 142 0x0415 => 'pl', 0x044c => 'ml', 0x081a => 'sr-RS', 0x203b => 'sms', 143 0x0416 => 'pt-BR', 0x044d => 'as', 0x081d => 'sv-FI', 0x2401 => 'ar-YE', 144 0x0417 => 'rm', 0x044e => 'mr', 0x082c => 'az-AZ', 0x2409 => 'en-CB', 145 0x0418 => 'ro', 0x044f => 'sa', 0x082e => 'dsb', 0x240a => 'es-CO', 146 0x0419 => 'ru', 0x0450 => 'mn-MN', 0x083b => 'se-SE', 0x243b => 'smn', 147 0x041a => 'hr', 0x0451 => 'bo', 0x083c => 'ga', 0x2801 => 'ar-SY', 148 0x041b => 'sk', 0x0452 => 'cy', 0x083e => 'ms-BN', 0x2809 => 'en-BZ', 149 0x041c => 'sq', 0x0453 => 'km', 0x0843 => 'uz-UZ', 0x280a => 'es-PE', 150 0x041d => 'sv-SE', 0x0454 => 'lo', 0x0845 => 'bn-BD', 0x2c01 => 'ar-JO', 151 0x041e => 'th', 0x0456 => 'gl', 0x0850 => 'mn-CN', 0x2c09 => 'en-TT', 152 0x041f => 'tr', 0x0457 => 'kok', 0x085d => 'iu-CA', 0x2c0a => 'es-AR', 153 0x0420 => 'ur', 0x045a => 'syr', 0x085f => 'tmh', 0x3001 => 'ar-LB', 154 0x0421 => 'id', 0x045b => 'si', 0x086b => 'qu-EC', 0x3009 => 'en-ZW', 155 0x0422 => 'uk', 0x045d => 'iu-CA', 0x0c01 => 'ar-EG', 0x300a => 'es-EC', 156 0x0423 => 'be', 0x045e => 'am', 0x0c04 => 'zh-HK', 0x3401 => 'ar-KW', 157 0x0424 => 'sl', 0x0461 => 'ne', 0x0c07 => 'de-AT', 0x3409 => 'en-PH', 158 0x0425 => 'et', 0x0462 => 'fy', 0x0c09 => 'en-AU', 0x340a => 'es-CL', 159 0x0426 => 'lv', 0x0463 => 'ps', 0x0c0a => 'es-ES', 0x3801 => 'ar-AE', 160 0x0427 => 'lt', 0x0464 => 'fil', 0x0c0c => 'fr-CA', 0x380a => 'es-UY', 161 0x0428 => 'tg', 0x0465 => 'dv', 0x0c1a => 'sr-RS', 0x3c01 => 'ar-BH', 162 0x042a => 'vi', 0x0468 => 'ha', 0x0c3b => 'se-FI', 0x3c0a => 'es-PY', 163 0x042b => 'hy', 0x046a => 'yo', 0x0c6b => 'qu-PE', 0x4001 => 'ar-QA', 164 0x042c => 'az-AZ', 0x046b => 'qu-BO', 0x1001 => 'ar-LY', 0x4009 => 'en-IN', 165 0x042d => 'eu', 0x046c => 'st', 0x1004 => 'zh-SG', 0x400a => 'es-BO', 166 0x042e => 'hsb', 0x046d => 'ba', 0x1007 => 'de-LU', 0x4409 => 'en-MY', 167 0x042f => 'mk', 0x046e => 'lb', 0x1009 => 'en-CA', 0x440a => 'es-SV', 168 0x0430 => 'st', 0x046f => 'kl', 0x100a => 'es-GT', 0x4809 => 'en-SG', 169 0x0431 => 'ts', 0x0470 => 'ig', 0x100c => 'fr-CH', 0x480a => 'es-HN', 170 0x0432 => 'tn', 0x0478 => 'yi', 0x101a => 'hr-BA', 0x4c0a => 'es-NI', 171 0x0434 => 'xh', 0x047a => 'arn', 0x103b => 'smj-NO',0x500a => 'es-PR', 172 0x0435 => 'zu', 0x047c => 'moh', 0x1401 => 'ar-DZ', 0x540a => 'es-US', 173 0x0436 => 'af', 0x047e => 'br', 0x1404 => 'zh-MO', 174 0x0437 => 'ka', 0x0480 => 'ug', 0x1407 => 'de-LI', 175 }, 176 Unicode => { }, 177 ISO => { }, 178 Custom => { }, 179); 180 181# eclectic table of tags for various format font files 182%Image::ExifTool::Font::Main = ( 183 GROUPS => { 2 => 'Document' }, 184 NOTES => q{ 185 This table contains a collection of tags found in font files of various 186 formats. ExifTool current recognizes OTF, TTF, TTC, DFONT, PFA, PFB, PFM, 187 AFM, ACFM and AMFM font files. 188 }, 189 name => { 190 SubDirectory => { TagTable => 'Image::ExifTool::Font::Name' }, 191 }, 192 PFM => { 193 Name => 'PFMHeader', 194 SubDirectory => { TagTable => 'Image::ExifTool::Font::PFM' }, 195 }, 196 PSInfo => { 197 Name => 'PSFontInfo', 198 SubDirectory => { TagTable => 'Image::ExifTool::Font::PSInfo' }, 199 }, 200 AFM => { 201 Name => 'AFM', 202 SubDirectory => { TagTable => 'Image::ExifTool::Font::AFM' }, 203 }, 204 numfonts => 'NumFonts', 205 fontname => 'FontName', 206 postfont => { 207 Name => 'PostScriptFontName', 208 Description => 'PostScript Font Name', 209 }, 210); 211 212# TrueType name tags (ref 1/2) 213%Image::ExifTool::Font::Name = ( 214 GROUPS => { 2 => 'Document' }, 215 NOTES => q{ 216 The following tags are extracted from the TrueType font "name" table found 217 in OTF, TTF, TTC and DFONT files. These tags support localized languages by 218 adding a hyphen followed by a language code to the end of the tag name (eg. 219 "Copyright-fr" or "License-en-US"). Tags with no language code use the 220 default language of "en". 221 }, 222 0 => { Name => 'Copyright', Groups => { 2 => 'Author' } }, 223 1 => 'FontFamily', 224 2 => 'FontSubfamily', 225 3 => 'FontSubfamilyID', 226 4 => 'FontName', # full name 227 5 => 'NameTableVersion', 228 6 => { Name => 'PostScriptFontName', Description => 'PostScript Font Name' }, 229 7 => 'Trademark', 230 8 => 'Manufacturer', 231 9 => 'Designer', 232 10 => 'Description', 233 11 => 'VendorURL', 234 12 => 'DesignerURL', 235 13 => 'License', 236 14 => 'LicenseInfoURL', 237 16 => 'PreferredFamily', 238 17 => 'PreferredSubfamily', 239 18 => 'CompatibleFontName', 240 19 => 'SampleText', 241 20 => { 242 Name => 'PostScriptFontName', 243 Description => 'PostScript Font Name', 244 }, 245 21 => 'WWSFamilyName', 246 22 => 'WWSSubfamilyName', 247); 248 249# PostScript Font Metric file header (ref 4) 250%Image::ExifTool::Font::PFM = ( 251 GROUPS => { 2 => 'Document' }, 252 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, 253 NOTES => 'Tags extracted from the PFM file header.', 254 0 => { 255 Name => 'PFMVersion', 256 Format => 'int16u', 257 PrintConv => 'sprintf("%x.%.2x",$val>>8,$val&0xff)', 258 }, 259 6 => { Name => 'Copyright', Format => 'string[60]', Groups => { 2 => 'Author' } }, 260 66 => { Name => 'FontType', Format => 'int16u' }, 261 68 => { Name => 'PointSize', Format => 'int16u' }, 262 70 => { Name => 'YResolution', Format => 'int16u' }, 263 72 => { Name => 'XResolution', Format => 'int16u' }, 264 74 => { Name => 'Ascent', Format => 'int16u' }, 265 76 => { Name => 'InternalLeading', Format => 'int16u' }, 266 78 => { Name => 'ExternalLeading', Format => 'int16u' }, 267 80 => { Name => 'Italic' }, 268 81 => { Name => 'Underline' }, 269 82 => { Name => 'Strikeout' }, 270 83 => { Name => 'Weight', Format => 'int16u' }, 271 85 => { Name => 'CharacterSet' }, 272 86 => { Name => 'PixWidth', Format => 'int16u' }, 273 88 => { Name => 'PixHeight', Format => 'int16u' }, 274 90 => { Name => 'PitchAndFamily' }, 275 91 => { Name => 'AvgWidth', Format => 'int16u' }, 276 93 => { Name => 'MaxWidth', Format => 'int16u' }, 277 95 => { Name => 'FirstChar' }, 278 96 => { Name => 'LastChar' }, 279 97 => { Name => 'DefaultChar' }, 280 98 => { Name => 'BreakChar' }, 281 99 => { Name => 'WidthBytes', Format => 'int16u' }, 282 # 101 => { Name => 'DeviceTypeOffset', Format => 'int32u' }, 283 # 105 => { Name => 'FontNameOffset', Format => 'int32u' }, 284 # 109 => { Name => 'BitsPointer', Format => 'int32u' }, 285 # 113 => { Name => 'BitsOffset', Format => 'int32u' }, 286); 287 288# PostScript FontInfo attributes (PFA, PFB) (ref PH) 289%Image::ExifTool::Font::PSInfo = ( 290 GROUPS => { 2 => 'Document' }, 291 NOTES => 'Tags extracted from PostScript font files (PFA and PFB).', 292 FullName => { }, 293 FamilyName => { Name => 'FontFamily' }, 294 Weight => { }, 295 ItalicAngle => { }, 296 isFixedPitch=> { }, 297 UnderlinePosition => { }, 298 UnderlineThickness => { }, 299 Copyright => { Groups => { 2 => 'Author' } }, 300 Notice => { Groups => { 2 => 'Author' } }, 301 version => { }, 302 FontName => { }, 303 FontType => { }, 304 FSType => { }, 305); 306 307# Adobe Font Metrics tags (AFM) (ref 6) 308%Image::ExifTool::Font::AFM = ( 309 GROUPS => { 2 => 'Document' }, 310 NOTES => 'Tags extracted from Adobe Font Metrics files (AFM, ACFM and AMFM).', 311 'Creation Date' => { Name => 'CreateDate', Groups => { 2 => 'Time' } }, 312 FontName => { }, 313 FullName => { }, 314 FamilyName => { Name => 'FontFamily' }, 315 Weight => { }, 316 Version => { }, 317 Notice => { Groups => { 2 => 'Author' } }, 318 EncodingScheme => { }, 319 MappingScheme => { }, 320 EscChar => { }, 321 CharacterSet=> { }, 322 Characters => { }, 323 IsBaseFont => { }, 324 # VVector => { }, 325 IsFixedV => { }, 326 CapHeight => { }, 327 XHeight => { }, 328 Ascender => { }, 329 Descender => { }, 330); 331 332#------------------------------------------------------------------------------ 333# Read information from a TrueType font collection (TTC) (refs 2,3) 334# Inputs: 0) ExifTool ref, 1) dirInfo ref 335# Returns: 1 on success, 0 if this wasn't a valid TrueType font collection 336sub ProcessTTC($$) 337{ 338 my ($et, $dirInfo) = @_; 339 my $raf = $$dirInfo{RAF}; 340 my ($buff, $i); 341 342 return 0 unless $raf->Read($buff, 12) == 12; 343 return 0 unless $buff =~ /^ttcf\0[\x01\x02]\0\0/; 344 SetByteOrder('MM'); 345 my $num = Get32u(\$buff, 8); 346 # might as well put a limit on the number of fonts we will parse (< 256) 347 return 0 unless $num < 0x100 and $raf->Read($buff, $num * 4) == $num * 4; 348 $et->SetFileType('TTC'); 349 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3; 350 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main'); 351 $et->HandleTag($tagTablePtr, 'numfonts', $num); 352 # loop through all fonts in the collection 353 for ($i=0; $i<$num; ++$i) { 354 my $n = $i + 1; 355 $et->VPrint(0, "Font $n:\n"); 356 $$et{SET_GROUP1} = "+$n"; 357 my $offset = Get32u(\$buff, $i * 4); 358 $raf->Seek($offset, 0) or last; 359 ProcessOTF($et, $dirInfo) or last; 360 } 361 delete $$et{SET_GROUP1}; 362 return 1; 363} 364 365#------------------------------------------------------------------------------ 366# Read information from a TrueType font file (OTF or TTF) (refs 1,2) 367# Inputs: 0) ExifTool ref, 1) dirInfo ref 368# Returns: 1 on success, 0 if this wasn't a valid TrueType font file 369sub ProcessOTF($$) 370{ 371 my ($et, $dirInfo) = @_; 372 my $raf = $$dirInfo{RAF}; 373 my ($tbl, $buff, $pos, $i); 374 my $base = $$dirInfo{Base} || 0; 375 376 return 0 unless $raf->Read($buff, 12) == 12; 377 return 0 unless $buff =~ /^(\0\x01\0\0|OTTO|true|typ1|\xa5(kbd|lst))[\0\x01]/; 378 379 $et->SetFileType($1 eq 'OTTO' ? 'OTF' : 'TTF'); 380 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3; 381 SetByteOrder('MM'); 382 my $numTables = Get16u(\$buff, 4); 383 return 0 unless $numTables > 0 and $numTables < 0x200; 384 my $len = $numTables * 16; 385 return 0 unless $raf->Read($tbl, $len) == $len; 386 387 my $verbose = $et->Options('Verbose'); 388 my $oldIndent = $$et{INDENT}; 389 $$et{INDENT} .= '| '; 390 $et->VerboseDir('TrueType', $numTables) if $verbose; 391 392 for ($pos=0; $pos<$len; $pos+=16) { 393 # look for 'name' table 394 my $tag = substr($tbl, $pos, 4); 395 next unless $tag eq 'name' or $verbose; 396 my $offset = Get32u(\$tbl, $pos + 8); 397 my $size = Get32u(\$tbl, $pos + 12); 398 unless ($raf->Seek($offset+$base, 0) and $raf->Read($buff, $size) == $size) { 399 $et->Warn("Error reading '${tag}' data"); 400 next; 401 } 402 if ($verbose) { 403 $tag =~ s/([\0-\x1f\x80-\xff])/sprintf('\x%.2x',ord $1)/ge; 404 my $str = sprintf("%s%d) Tag '%s' (offset 0x%.4x, %d bytes)\n", 405 $$et{INDENT}, $pos/16, $tag, $offset, $size); 406 $et->VPrint(0, $str); 407 $et->VerboseDump(\$buff, Addr => $offset) if $verbose > 2; 408 next unless $tag eq 'name'; 409 } 410 next unless $size >= 8; 411 my $entries = Get16u(\$buff, 2); 412 my $recEnd = 6 + $entries * 12; 413 if ($recEnd > $size) { 414 $et->Warn('Truncated name record'); 415 last; 416 } 417 my $strStart = Get16u(\$buff, 4); 418 if ($strStart < $recEnd or $strStart > $size) { 419 $et->Warn('Invalid string offset'); 420 last; 421 } 422 # parse language-tag record (in format 1 Naming table only) (ref 2) 423 my %langTag; 424 if (Get16u(\$buff, 0) == 1 and $recEnd + 2 <= $size) { 425 my $langTags = Get16u(\$buff, $recEnd); 426 if ($langTags and $recEnd + 2 + $langTags * 4 < $size) { 427 for ($i=0; $i<$langTags; ++$i) { 428 my $pt = $recEnd + 2 + $i * 4; 429 my $langLen = Get16u(\$buff, $pt); 430 # make sure the language string length is reasonable (UTF-16BE) 431 last if $langLen == 0 or $langLen & 0x01 or $langLen > 40; 432 my $langPt = Get16u(\$buff, $pt + 2) + $strStart; 433 last if $langPt + $langLen > $size; 434 my $lang = substr($buff, $langPt, $langLen); 435 $lang = $et->Decode($lang,'UCS2','MM','UTF8'); 436 $lang =~ tr/-_a-zA-Z0-9//dc; # remove naughty characters 437 $langTag{$i + 0x8000} = $lang; 438 } 439 } 440 } 441 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Name'); 442 $$et{INDENT} .= '| '; 443 $et->VerboseDir('Name', $entries) if $verbose; 444 for ($i=0; $i<$entries; ++$i) { 445 my $pt = 6 + $i * 12; 446 my $platform = Get16u(\$buff, $pt); 447 my $encoding = Get16u(\$buff, $pt + 2); 448 my $langID = Get16u(\$buff, $pt + 4); 449 my $nameID = Get16u(\$buff, $pt + 6); 450 my $strLen = Get16u(\$buff, $pt + 8); 451 my $strPt = Get16u(\$buff, $pt + 10) + $strStart; 452 if ($strPt + $strLen <= $size) { 453 my $val = substr($buff, $strPt, $strLen); 454 my ($lang, $charset, $extra); 455 my $sys = $ttPlatform{$platform}; 456 # translate from specified encoding 457 if ($sys) { 458 $lang = $ttLang{$sys}{$langID} || $langTag{$langID}; 459 $charset = $ttCharset{$sys}{$encoding}; 460 if (not $charset) { 461 if (not defined $charset and not $$et{FontWarn}) { 462 $et->Warn("Unknown $sys character set ($encoding)"); 463 $$et{FontWarn} = 1; 464 } 465 } else { 466 # translate to ExifTool character set 467 $val = $et->Decode($val, $charset); 468 } 469 } else { 470 $et->Warn("Unknown platform ($platform) for name $nameID"); 471 } 472 # get the tagInfo for our specific language (use 'en' for default) 473 my $tagInfo = $et->GetTagInfo($tagTablePtr, $nameID); 474 if ($tagInfo and $lang and $lang ne 'en') { 475 my $langInfo = Image::ExifTool::GetLangInfo($tagInfo, $lang); 476 $tagInfo = $langInfo if $langInfo; 477 } 478 if ($verbose) { 479 $langID > 0x400 and $langID = sprintf('0x%x', $langID); 480 $extra = ", Plat=$platform/" . ($sys || 'Unknown') . ', ' . 481 "Enc=$encoding/" . ($charset || 'Unknown') . ', ' . 482 "Lang=$langID/" . ($lang || 'Unknown'); 483 } 484 $et->HandleTag($tagTablePtr, $nameID, $val, 485 TagInfo => $tagInfo, 486 DataPt => \$buff, 487 DataPos => $offset, 488 Start => $strPt, 489 Size => $strLen, 490 Index => $i, 491 Extra => $extra, 492 ); 493 } 494 } 495 $$et{INDENT} = $oldIndent . '| '; 496 last unless $verbose; 497 } 498 $$et{INDENT} = $oldIndent; 499 return 1; 500} 501 502#------------------------------------------------------------------------------ 503# Read information from an Adobe Font Metrics file (AFM, ACFM, AMFM) (ref 6) 504# Inputs: 0) ExifTool ref, 1) dirInfo ref 505# Returns: 1 on success, 0 if this wasn't a recognized AFM-type file 506sub ProcessAFM($$) 507{ 508 my ($et, $dirInfo) = @_; 509 my $raf = $$dirInfo{RAF}; 510 my ($buff, $comment); 511 512 require Image::ExifTool::PostScript; 513 local $/ = Image::ExifTool::PostScript::GetInputRecordSeparator($raf); 514 $raf->ReadLine($buff); 515 return 0 unless $buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/; 516 my $ftyp = $1 ? ($1 eq 'Comp' ? 'ACFM' : 'AMFM') : 'AFM'; 517 $et->SetFileType($ftyp, 'application/x-font-afm'); 518 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3; 519 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::AFM'); 520 521 for (;;) { 522 $raf->ReadLine($buff) or last; 523 if (defined $comment and $buff !~ /^Comment\s/) { 524 $et->FoundTag('Comment', $comment); 525 undef $comment; 526 } 527 $buff =~ /^(\w+)\s+(.*?)[\x0d\x0a]/ or next; 528 my ($tag, $val) = ($1, $2); 529 if ($tag eq 'Comment' and $val =~ /^(Creation Date):\s+(.*)/) { 530 ($tag, $val) = ($1, $2); 531 } 532 $val =~ s/^\((.*)\)$/$1/; # (some values may be in brackets) 533 if ($tag eq 'Comment') { 534 # concatinate all comments into a single value 535 $comment = defined($comment) ? "$comment\n$val" : $val; 536 next; 537 } 538 unless ($et->HandleTag($tagTablePtr, $tag, $val)) { 539 # end parsing if we start any subsection 540 last if $tag =~ /^Start/ and $tag ne 'StartDirection'; 541 } 542 } 543 return 1; 544} 545 546#------------------------------------------------------------------------------ 547# Read information from various format font files 548# Inputs: 0) ExifTool ref, 1) dirInfo ref 549# Returns: 1 on success, 0 if this wasn't a recognized Font file 550sub ProcessFont($$) 551{ 552 my ($et, $dirInfo) = @_; 553 my $raf = $$dirInfo{RAF}; 554 my ($buff, $buf2, $rtnVal); 555 return 0 unless $raf->Read($buff, 24) and $raf->Seek(0,0); 556 if ($buff =~ /^(\0\x01\0\0|OTTO|true|typ1)[\0\x01]/) { # OTF, TTF 557 $rtnVal = ProcessOTF($et, $dirInfo); 558 } elsif ($buff =~ /^ttcf\0[\x01\x02]\0\0/) { # TTC 559 $rtnVal = ProcessTTC($et, $dirInfo); 560 } elsif ($buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/s) { # AFM 561 $rtnVal = ProcessAFM($et, $dirInfo); 562 } elsif ($buff =~ /^(.{6})?%!(PS-(AdobeFont-|Bitstream )|FontType1-)/s) {# PFA, PFB 563 $raf->Seek(6,0) and $et->SetFileType('PFB') if $1; 564 require Image::ExifTool::PostScript; 565 $rtnVal = Image::ExifTool::PostScript::ProcessPS($et, $dirInfo); 566 } elsif ($buff =~ /^\0[\x01\x02]/ and $raf->Seek(0, 2) and # PFM 567 # validate file size 568 $raf->Tell() > 117 and $raf->Tell() == unpack('x2V',$buff) and 569 # read PFM header 570 $raf->Seek(0,0) and $raf->Read($buff,117) == 117 and 571 # validate "DeviceType" string (must be "PostScript\0") 572 SetByteOrder('II') and $raf->Seek(Get32u(\$buff, 101), 0) and 573 # the DeviceType should be "PostScript\0", but FontForge 574 # incorrectly writes "Postscript\0", so ignore case 575 $raf->Read($buf2, 11) == 11 and lc($buf2) eq "postscript\0") 576 { 577 $et->SetFileType('PFM'); 578 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3; 579 SetByteOrder('II'); 580 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main'); 581 # process the PFM header 582 $et->HandleTag($tagTablePtr, 'PFM', $buff); 583 # extract the font names 584 my $nameOff = Get32u(\$buff, 105); 585 if ($raf->Seek($nameOff, 0) and $raf->Read($buff, 256) and 586 $buff =~ /^([\x20-\xff]+)\0([\x20-\xff]+)\0/) 587 { 588 $et->HandleTag($tagTablePtr, 'fontname', $1); 589 $et->HandleTag($tagTablePtr, 'postfont', $2); 590 } 591 $rtnVal = 1; 592 } elsif ($buff =~ /^(wOF[F2])/) { 593 my $type = $1 eq 'wOFF' ? 'woff' : 'woff2'; 594 $et->SetFileType(uc($type), "font/$type"); 595 # (don't yet extract metadata from these files) 596 $rtnVal = 1; 597 } else { 598 $rtnVal = 0; 599 } 600 return $rtnVal; 601} 602 6031; # end 604 605__END__ 606 607=head1 NAME 608 609Image::ExifTool::Font - Read meta information from font files 610 611=head1 SYNOPSIS 612 613This module is used by Image::ExifTool 614 615=head1 DESCRIPTION 616 617This module contains the routines required by Image::ExifTool to read meta 618information from various format font files. Currently recognized font file 619types are OTF, TTF, TTC, DFONT, PFA, PFB, PFM, AFM, ACFM and AMFM. As well, 620WOFF and WOFF2 font files are identified, but metadata is not currently 621extracted from these formats. 622 623=head1 AUTHOR 624 625Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com) 626 627This library is free software; you can redistribute it and/or modify it 628under the same terms as Perl itself. 629 630=head1 REFERENCES 631 632=over 4 633 634=item L<http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html> 635 636=item L<http://www.microsoft.com/typography/otspec/otff.htm> 637 638=item L<http://partners.adobe.com/public/developer/opentype/index_font_file.html> 639 640=item L<http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf> 641 642=item L<http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java> 643 644=item L<http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf> 645 646=back 647 648=head1 SEE ALSO 649 650L<Image::ExifTool::TagNames/Font Tags>, 651L<Image::ExifTool(3pm)|Image::ExifTool> 652 653=cut 654 655