1 {
2 *****************************************************************************
3 This file is part of the Lazarus Component Library (LCL)
4
5 See the file COPYING.modifiedLGPL.txt, included in this distribution,
6 for details about the license.
7 *****************************************************************************
8
9 Author: Jesus Reyes Aguilar
10 }
11 unit PostScriptUnicode;
12
13 {$mode objfpc}{$H+}
14
15 interface
16
17 uses
18 Classes, SysUtils, Math,
19 // LazUtils
20 Maps;
21
22 type
23 TUnicodeBlock = record
24 Ini, Fin, PSCount: Integer;
25 end;
26
27 PGlyph = ^TGlyph;
28 TGlyph = record
29 Code: Word;
30 Name: string[21];
31 end;
32
33 // TODO: modify GlyphsArr sorted by Name
34 {$i glyphlist.inc}
35
36 type
37
38 { TPsUnicode }
39
40 TPsUnicode = class
41 private
42 FFontSize: Integer;
43 FFontStyle: Integer;
44 FGlyphs: TMap;
45 FBlocks: array of TUnicodeBlock;
46 FEncodings: array of Integer;
47 FOutLst, FBaseFonts,FEncodedFonts,FUsedFonts: TStringList;
48 FLastFontIndex: Integer;
49 FFont: string;
50 procedure CountPSChars;
51 procedure CreateGlyphMap;
52 procedure CreateUniCodeBlocks;
FindEncodingIndexnull53 function FindEncodingIndex(ABlock: Integer): Integer;
IndexOfFontnull54 function IndexOfFont(AFontName:string; AFontSize,AFontStyle,ABlock:Integer): Integer;
SelectFontnull55 function SelectFont(AFontName:string; AFontSize,AFontStyle,ABlock:Integer): string;
56 procedure ReportBlockEncoding(i:Integer);
57 procedure SetFont(const AValue: string);
58 procedure SetFontSize(const AValue: Integer);
59 procedure SetFontStyle(const AValue: Integer);
60 public
61 constructor create;
62 destructor destroy; override;
63 procedure OutputString(S:string);
BlockFornull64 function BlockFor(var w: word):integer;
65 procedure ResetLastFont;
UnicodeToGlyphnull66 function UnicodeToGlyph(w: word): string;
67 property Font: string read FFont write SetFont;
68 property FontSize: Integer read FFontSize write SetFontSize;
69 property FOntStyle: Integer read FFontStyle write SetFontStyle;
70 property OutLst: TStringlist read FOutLst write FOutLst;
71 end;
72
73 implementation
74
Octalnull75 function Octal(c : byte) : string;
76 begin
77 result := '\' +
78 char( ord('0') + (c div 64) ) +
79 char( ord('0') + (c mod 64) div 8 ) +
80 Char( ord('0') + (c mod 8 ) );
81 end;
82
83 { TPsUnicode }
84
85 procedure TPsUnicode.CreateGlyphMap;
86 var
87 i: word;
88 begin
89
90 if FGlyphs<>nil then
91 exit;
92
93 FGlyphs := TMap.Create(itu2, SizeOf(word));
94 for i:=0 to GLYPHCOUNT-1 do
95 FGlyphs.Add(GlyphsArr[i].Code, i);
96
97 CountPSChars;
98 end;
99
100 procedure TPsUnicode.CreateUniCodeBlocks;
101 procedure AddBlock(Ini,Fin:Integer);
102 var
103 i: Integer;
104 begin
105 i := Length(FBlocks);
106 SetLength(FBlocks, i+1);
107 FBlocks[i].Ini:=Ini;
108 FBlocks[i].Fin:=Fin;
109 end;
110 begin
111 if Length(FBlocks)>0 then
112 exit;
113 //(^([A-Z0-9 \-]+)*).U\+([A-F0-9]+).U\+([A-F0-9]+)
114 // AddBlock\(\$$3,\$$4); // $1
115
116 // Following two blocks are merged into one
117 //AddBlock($0000,$007F); // Basic Latin (128)
118 //AddBlock($0080,$00FF); // Latin-1 Supplement (128)
119 AddBlock($0000,$00FF); // Basic Latin + Latin1 sup (256)
120
121 AddBlock($0100,$017F); // Latin Extended-A (128)
122 AddBlock($0180,$024F); // Latin Extended-B (208)
123 AddBlock($0250,$02AF); // IPA Extensions (96)
124 AddBlock($02B0,$02FF); // Spacing Modifier Letters (80)
125 AddBlock($0300,$036F); // Combining Diacritical Marks (112)
126 AddBlock($0370,$03FF); // Greek and Coptic (134)
127 AddBlock($0400,$04FF); // Cyrillic (256)
128 AddBlock($0500,$052F); // Cyrillic Supplement (36)
129 AddBlock($0530,$058F); // Armenian (86)
130 AddBlock($0590,$05FF); // Hebrew (87)
131 AddBlock($0600,$06FF); // Arabic (250)
132 AddBlock($0700,$074F); // Syriac (77)
133 AddBlock($0750,$077F); // Arabic Supplement (48)
134 AddBlock($0780,$07BF); // Thaana (50)
135 AddBlock($07C0,$07FF); // NKo (59)
136 AddBlock($0900,$097F); // Devanagari (112)
137 AddBlock($0980,$09FF); // Bengali (91)
138 AddBlock($0A00,$0A7F); // Gurmukhi (79)
139 AddBlock($0A80,$0AFF); // Gujarati (83)
140 AddBlock($0B00,$0B7F); // Oriya (84)
141 AddBlock($0B80,$0BFF); // Tamil (72)
142 AddBlock($0C00,$0C7F); // Telugu (93)
143 AddBlock($0C80,$0CFF); // Kannada (86)
144 AddBlock($0D00,$0D7F); // Malayalam (95)
145 AddBlock($0D80,$0DFF); // Sinhala (80)
146 AddBlock($0E00,$0E7F); // Thai (87)
147 AddBlock($0E80,$0EFF); // Lao (65)
148 AddBlock($0F00,$0FFF); // Tibetan (201)
149 AddBlock($1000,$109F); // Myanmar (156)
150 AddBlock($10A0,$10FF); // Georgian (83)
151 AddBlock($1100,$11FF); // Hangul Jamo (240)
152 AddBlock($1200,$137F); // Ethiopic (356)
153 AddBlock($1380,$139F); // Ethiopic Supplement (26)
154 AddBlock($13A0,$13FF); // Cherokee (85)
155 AddBlock($1400,$167F); // Unified Canadian Aboriginal Syllabics (630)
156 AddBlock($1680,$169F); // Ogham (29)
157 AddBlock($16A0,$16FF); // Runic (81)
158 AddBlock($1700,$171F); // Tagalog (20)
159 AddBlock($1720,$173F); // Hanunoo (23)
160 AddBlock($1740,$175F); // Buhid (20)
161 AddBlock($1760,$177F); // Tagbanwa (18)
162 AddBlock($1780,$17FF); // Khmer (114)
163 AddBlock($1800,$18AF); // Mongolian (156)
164 AddBlock($1900,$194F); // Limbu (66)
165 AddBlock($1950,$197F); // Tai Le (35)
166 AddBlock($1980,$19DF); // New Tai Lue (80)
167 AddBlock($19E0,$19FF); // Khmer Symbols (32)
168 AddBlock($1A00,$1A1F); // Buginese (30)
169 AddBlock($1B00,$1B7F); // Balinese (121)
170 AddBlock($1B80,$1BBF); // Sundanese (55)
171 AddBlock($1C00,$1C4F); // Lepcha (74)
172 AddBlock($1C50,$1C7F); // Ol Chiki (48)
173 AddBlock($1D00,$1D7F); // Phonetic Extensions (128)
174 AddBlock($1D80,$1DBF); // Phonetic Extensions Supplement (64)
175 AddBlock($1DC0,$1DFF); // Combining Diacritical Marks Supplement (41)
176 AddBlock($1E00,$1EFF); // Latin Extended Additional (256)
177 AddBlock($1F00,$1FFF); // Greek Extended (233)
178 AddBlock($2000,$206F); // General Punctuation (107)
179 AddBlock($2070,$209F); // Superscripts and Subscripts (34)
180 AddBlock($20A0,$20CF); // Currency Symbols (22)
181 AddBlock($20D0,$20FF); // Combining Diacritical Marks for Symbols (33)
182 AddBlock($2100,$214F); // Letterlike Symbols (80)
183 AddBlock($2150,$218F); // Number Forms (54)
184 AddBlock($2190,$21FF); // Arrows (112)
185 AddBlock($2200,$22FF); // Mathematical Operators (256)
186 AddBlock($2300,$23FF); // Miscellaneous Technical (232)
187 AddBlock($2400,$243F); // Control Pictures (39)
188 AddBlock($2440,$245F); // Optical Character Recognition (11)
189 AddBlock($2460,$24FF); // Enclosed Alphanumerics (160)
190 AddBlock($2500,$257F); // Box Drawing (128)
191 AddBlock($2580,$259F); // Block Elements (32)
192 AddBlock($25A0,$25FF); // Geometric Shapes (96)
193 AddBlock($2600,$26FF); // Miscellaneous Symbols (191)
194 AddBlock($2700,$27BF); // Dingbats (174)
195 AddBlock($27C0,$27EF); // Miscellaneous Mathematical Symbols-A (44)
196 AddBlock($27F0,$27FF); // Supplemental Arrows-A (16)
197 AddBlock($2800,$28FF); // Braille Patterns (256)
198 AddBlock($2900,$297F); // Supplemental Arrows-B (128)
199 AddBlock($2980,$29FF); // Miscellaneous Mathematical Symbols-B (128)
200 AddBlock($2A00,$2AFF); // Supplemental Mathematical Operators (256)
201 AddBlock($2B00,$2BFF); // Miscellaneous Symbols and Arrows (82)
202 AddBlock($2C00,$2C5F); // Glagolitic (94)
203 AddBlock($2C60,$2C7F); // Latin Extended-C (29)
204 AddBlock($2C80,$2CFF); // Coptic (114)
205 AddBlock($2D00,$2D2F); // Georgian Supplement (38)
206 AddBlock($2D30,$2D7F); // Tifinagh (55)
207 AddBlock($2D80,$2DDF); // Ethiopic Extended (79)
208 AddBlock($2DE0,$2DFF); // Cyrillic Extended-A (32)
209 AddBlock($2E00,$2E7F); // Supplemental Punctuation (49)
210 AddBlock($2E80,$2EFF); // CJK Radicals Supplement (115)
211 AddBlock($2F00,$2FDF); // Kangxi Radicals (214)
212 AddBlock($2FF0,$2FFF); // Ideographic Description Characters (12)
213 AddBlock($3000,$303F); // CJK Symbols and Punctuation (64)
214 AddBlock($3040,$309F); // Hiragana (93)
215 AddBlock($30A0,$30FF); // Katakana (96)
216 AddBlock($3100,$312F); // Bopomofo (41)
217 AddBlock($3130,$318F); // Hangul Compatibility Jamo (94)
218 AddBlock($3190,$319F); // Kanbun (16)
219 AddBlock($31A0,$31BF); // Bopomofo Extended (24)
220 AddBlock($31C0,$31EF); // CJK Strokes (36)
221 AddBlock($31F0,$31FF); // Katakana Phonetic Extensions (16)
222 AddBlock($3200,$32FF); // Enclosed CJK Letters and Months (242)
223 AddBlock($3300,$33FF); // CJK Compatibility (256)
224 AddBlock($3400,$4DBF); // CJK Unified Ideographs Extension A (2)
225 AddBlock($4DC0,$4DFF); // Yijing Hexagram Symbols (64)
226 AddBlock($4E00,$9FFF); // CJK Unified Ideographs (2)
227 AddBlock($A000,$A48F); // Yi Syllables (1165)
228 AddBlock($A490,$A4CF); // Yi Radicals (55)
229 AddBlock($A500,$A63F); // Vai (300)
230 AddBlock($A640,$A69F); // Cyrillic Extended-B (78)
231 AddBlock($A700,$A71F); // Modifier Tone Letters (32)
232 AddBlock($A720,$A7FF); // Latin Extended-D (114)
233 AddBlock($A800,$A82F); // Syloti Nagri (44)
234 AddBlock($A840,$A87F); // Phags-pa (56)
235 AddBlock($A880,$A8DF); // Saurashtra (81)
236 AddBlock($A900,$A92F); // Kayah Li (48)
237 AddBlock($A930,$A95F); // Rejang (37)
238 AddBlock($AA00,$AA5F); // Cham (83)
239 AddBlock($AC00,$D7AF); // Hangul Syllables (2)
240 AddBlock($D800,$DB7F); // High Surrogates (2)
241 AddBlock($DB80,$DBFF); // High Private Use Surrogates (2)
242 AddBlock($DC00,$DFFF); // Low Surrogates (2)
243 AddBlock($E000,$F8FF); // Private Use Area (2)
244 AddBlock($F900,$FAFF); // CJK Compatibility Ideographs (467)
245 AddBlock($FB00,$FB4F); // Alphabetic Presentation Forms (58)
246 AddBlock($FB50,$FDFF); // Arabic Presentation Forms-A (595)
247 AddBlock($FE00,$FE0F); // Variation Selectors (16)
248 AddBlock($FE10,$FE1F); // Vertical Forms (10)
249 AddBlock($FE20,$FE2F); // Combining Half Marks (7)
250 AddBlock($FE30,$FE4F); // CJK Compatibility Forms (32)
251 AddBlock($FE50,$FE6F); // Small Form Variants (26)
252 AddBlock($FE70,$FEFF); // Arabic Presentation Forms-B (141)
253 AddBlock($FF00,$FFEF); // Halfwidth and Fullwidth Forms (225)
254 AddBlock($FFF0,$FFFF); // Specials (5)
255
256 // next blocks are outside BMP
257 //AddBlock($10000,$1007F); // Linear B Syllabary (88)
258 //AddBlock($10080,$100FF); // Linear B Ideograms (123)
259 //AddBlock($10100,$1013F); // Aegean Numbers (57)
260 //AddBlock($10140,$1018F); // Ancient Greek Numbers (75)
261 //AddBlock($10190,$101CF); // Ancient Symbols (12)
262 //AddBlock($101D0,$101FF); // Phaistos Disc (46)
263 //AddBlock($10280,$1029F); // Lycian (29)
264 //AddBlock($102A0,$102DF); // Carian (49)
265 //AddBlock($10300,$1032F); // Old Italic (35)
266 //AddBlock($10330,$1034F); // Gothic (27)
267 //AddBlock($10380,$1039F); // Ugaritic (31)
268 //AddBlock($103A0,$103DF); // Old Persian (50)
269 //AddBlock($10400,$1044F); // Deseret (80)
270 //AddBlock($10450,$1047F); // Shavian (48)
271 //AddBlock($10480,$104AF); // Osmanya (40)
272 //AddBlock($10800,$1083F); // Cypriot Syllabary (55)
273 //AddBlock($10900,$1091F); // Phoenician (27)
274 //AddBlock($10920,$1093F); // Lydian (27)
275 //AddBlock($10A00,$10A5F); // Kharoshthi (65)
276 //AddBlock($12000,$123FF); // Cuneiform (879)
277 //AddBlock($12400,$1247F); // Cuneiform Numbers and Punctuation (103)
278 //AddBlock($1D000,$1D0FF); // Byzantine Musical Symbols (246)
279 //AddBlock($1D100,$1D1FF); // Musical Symbols (220)
280 //AddBlock($1D200,$1D24F); // Ancient Greek Musical Notation (70)
281 //AddBlock($1D300,$1D35F); // Tai Xuan Jing Symbols (87)
282 //AddBlock($1D360,$1D37F); // Counting Rod Numerals (18)
283 //AddBlock($1D400,$1D7FF); // Mathematical Alphanumeric Symbols (996)
284 //AddBlock($1F000,$1F02F); // Mahjong Tiles (44)
285 //AddBlock($1F030,$1F09F); // Domino Tiles (100)
286 //AddBlock($20000,$2A6DF); // CJK Unified Ideographs Extension B (2)
287 //AddBlock($2F800,$2FA1F); // CJK Compatibility Ideographs Supplement (542)
288 //AddBlock($E0000,$E007F); // Tags (97)
289 //AddBlock($E0100,$E01EF); // Variation Selectors Supplement (240)
290 //AddBlock($F0000,$FFFFF); // Supplementary Private Use Area-A (2)
291 //AddBlock($100000,$10FFFF); // Supplementary Private Use Area-B (2)
292 end;
293
294 constructor TPsUnicode.create;
295 begin
296 inherited create;
297 FBaseFonts := TStringList.Create;
298 FEncodedFonts := TStringList.Create;
299 FUsedFonts := TStringList.Create;
300 FLastFontIndex := -1;
301 FFontSize := 12;
302 end;
303
304 destructor TPsUnicode.destroy;
305 begin
306 FUsedFonts.Free;
307 FEncodedFonts.Free;
308 FBaseFonts.Free;
309 FGlyphs.Free;
310 inherited destroy;
311 end;
312
313 procedure TPsUnicode.OutputString(S: string);
314 var
315 {$IFDEF FPC_HAS_UNICODESTRING}
316 UStr: UnicodeString;
317 {$ELSE}
318 UStr: WideString;
319 {$ENDIF}
320 w: word;
321 i, b: Integer;
322 c: char;
323 SubStr,FontStr: string;
324 FontIndex: Integer;
325
326 procedure EmitSubStr;
327 begin
328 if SubStr<>'' then begin
329 OutLst.Add(FontStr + '('+SubStr+') show');
330 end;
331 SubStr := '';
332 FontStr := '';
333 end;
334
335 begin
336
337 CreateUnicodeBlocks;
338 CreateGlyphMap;
339
340 UStr := UTF8Decode(S);
341 SubStr := '';
342 for i:=1 to Length(UStr) do begin
343
344 w := word(UStr[i]);
345 b := BlockFor(w);
346
347 FontIndex := IndexOfFont(Font, FontSize, FontStyle, b);
348 if (FontIndex<0) or (FontIndex<>FLastFontIndex) then begin
349 EmitSubStr;
350 FontStr := SelectFont(Font, FontSize, FontStyle, b);
351 end;
352
353 c := Char(Byte(w-FBlocks[b].Ini));
354 if c in [#0..#31,'(',')','\'] then
355 SubStr := SubStr + Octal(ord(c))
356 else
357 SubStr := SubStr + c;
358 end;
359
360 EmitSubStr;
361 end;
362
363 procedure TPsUnicode.CountPSChars;
364 var
365 Id: Word;
366 i,j: Integer;
367 begin
368 for i:=0 to Length(FBlocks)-1 do begin
369 FBlocks[i].PSCount:=0;
370 for j:=FBlocks[i].Ini to FBlocks[i].Fin do begin
371 Id := word(j);
372 if FGlyphs.HasId(Id) then
373 Inc(FBlocks[i].PSCount);
374 end;
375 end;
376 end;
377
BlockFornull378 function TPsUnicode.BlockFor(var w: word): integer;
379 var
380 i: Integer;
381 begin
382 CreateUnicodeBlocks;
383 for i:=0 to Length(FBlocks)-1 do begin
384 if FBlocks[i].PSCount=0 then
385 continue;
386 if w<FBlocks[i].Ini then
387 break
388 else
389 if w<=FBlocks[i].Fin then begin
390 result := i;
391 exit;
392 end;
393 end;
394 result := 0;
395 w := 32;
396 end;
397
TPsUnicode.FindEncodingIndexnull398 function TPsUnicode.FindEncodingIndex(ABlock: Integer): Integer;
399 var
400 i: Integer;
401 begin
402 result := -1;
403 for i:=0 to Length(FEncodings)-1 do
404 if FEncodings[i]=ABlock then begin
405 result := i;
406 break;
407 end;
408 end;
409
IndexOfFontnull410 function TPsUnicode.IndexOfFont(AFontName: string; AFontSize, AFontStyle,
411 ABlock: Integer): Integer;
412 var
413 BaseName: string;
414 begin
415 Result := FindEncodingIndex(ABlock);
416 if Result<0 then
417 exit;
418
419 Result := FBaseFonts.IndexOf(AFontName);
420 if Result<0 then
421 exit;
422
423 BaseName := 'F'+IntToStr(Result)+IntToStr(ABlock);
424 Result := FEncodedFonts.IndexOf(BaseName);
425 if Result<0 then
426 exit;
427
428 Result := FUsedFonts.IndexOf(BaseName+IntToStr(AFontSize)+IntToStr(AFontStyle));
429 end;
430
TPsUnicode.SelectFontnull431 function TPsUnicode.SelectFont(AFontName: string; AFontSize, AFontStyle,
432 ABlock: Integer): string;
433 var
434 i,EncIndx: Integer;
435 EncFont,EncScaledFont: string;
436 begin
437
438 EncIndx := FindEncodingIndex(ABlock);
439 if EncIndx<0 then begin
440 // add new encoding array
441 EncIndx := Length(FEncodings);
442 Setlength(FEncodings, EncIndx+1);
443 FEncodings[EncIndx] := ABlock;
444
445 // emit encoding array
446 OutLst.Add(format('/Arr%d %% %.4x-%.4x',[ABlock,FBLocks[ABlock].ini,FBLocks[ABlock].fin]));
447 OutLst.Add('[');
448 ReportBlockEncoding(ABlock);
449 OutLst.Add('] def');
450 OutLst.Add('');;
451 end;
452
453 // NewFontName [NewEncodingArray] /FontName RE
454 i := FBaseFonts.IndexOf(AFontName);
455 if i<0 then
456 i := FBaseFonts.Add(AFontName);
457
458 EncFont :='F'+IntToStr(i)+IntToStr(ABlock);
459 i := FEncodedFonts.IndexOf(EncFont);
460 if i<0 then begin
461 i := FEncodedFonts.Add(EncFont);
462 OutLst.Add('');;
463 OutLst.Add( format('/%s Arr%d /%s RE',[EncFont,ABlock,AFontName]));
464 end;
465
466 // if it's first font use, scale and define it, else just invoke it
467 EncScaledFont := format('%s%d%d',[EncFont,AFontSize,AFontStyle]);
468 i := FUsedFonts.IndexOf(EncScaledFont);
469 if i<0 then begin
470 i := FUsedFonts.Add(EncScaledFont);
471 OutLst.Add(format('/%s { /%s %d selectfont } bind def',
472 [EncScaledFont,EncFont,AFontSize]));
473 OutLst.Add('');
474 end;
475 if FLastFontIndex<>i then begin
476 FLastFontIndex := i;
477 Result := EncScaledFont + ' ';
478 end else
479 Result := '';
480 end;
481
482 procedure TPsUnicode.ReportBlockEncoding(i: Integer);
483 var
484 b,g,j: Integer;
485 Id,GlypIndx: Word;
486 S: string;
487 n: Integer;
488 begin
489 CreateGlyphMap;
490 n := FBlocks[i].Fin-FBlocks[i].Ini+1;
491 g := FBlocks[i].Ini;
492 S := '';
493 for b:=1 to ceil(n/256) do begin
494 for j:=0 to 255 do begin
495 Id := Word(g);
496 if (g<=FBlocks[i].Fin) and FGlyphs.HasID(Id) then begin
497 FGlyphs.GetData(Id, GlypIndx);
498 S := S + '/'+GlyphsArr[GlypIndx].Name+' ';
499 end else
500 S := S + '/uni'+IntToHex(Id,4)+' ';
501 if (j+1) mod 8 = 0 then begin
502 OutLst.Add(S);
503 S := '';
504 end;
505 inc(g);
506 end;
507 break; // TODO: handle only first 256 chars on blocks with too many chars
508 end;
509 if s<>'' then
510 OutLst.Add(S);
511 end;
512
513 procedure TPsUnicode.SetFont(const AValue: string);
514 begin
515 if AValue='' then
516 FFont := 'Times-Roman'
517 else
518 FFont := AValue;
519 end;
520
521 procedure TPsUnicode.SetFontSize(const AValue: Integer);
522 begin
523 if FFontSize>0 then
524 FFontSize := AValue;
525 end;
526
527 procedure TPsUnicode.SetFontStyle(const AValue: Integer);
528 begin
529 if FFontStyle<0 then
530 FFontStyle := 0
531 else
532 FFontStyle := AValue;
533 end;
534
535 procedure TPsUnicode.ResetLastFont;
536 begin
537 FLastFontIndex:=-1;
538 end;
539
UnicodeToGlyphnull540 function TPsUnicode.UnicodeToGlyph(w: word): string;
541 var
542 i: word;
543 begin
544 CreateGlyphMap;
545
546 if FGlyphs.GetData(w, i) then
547 result := GlyphsArr[i].Name
548 else
549 result := '';
550 end;
551
552 end.
553
554