1 /*
2  * $Id: PDFFontEncoding.java,v 1.4 2009-02-12 13:53:54 tomoke Exp $
3  *
4  * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
5  * Santa Clara, California 95054, U.S.A. All rights reserved.
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20  */
21 package com.sun.pdfview.font;
22 
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 
29 import com.sun.pdfview.PDFObject;
30 
31 /**
32  * The PDFFont encoding encapsulates the mapping from character codes
33  * in the PDF document to glyphs of the font.
34  *
35  * Encodings take two basic forms.  For Type1, TrueType, and Type3 fonts,
36  * the encoding maps from character codes to Strings, which represent the
37  * glyphs of the font.  For Type0 fonts, the mapping is a CMap which maps
38  * character codes to characters in one of many descendant fonts.
39  *
40  * Note that the data in the PDF might be ASCII characters (bytes) or it might
41  * be a multi-byte format such as unicode.  For now we will assume all
42  * glyph ids fit into at most the two bytes of a character.
43  */
44 public class PDFFontEncoding {
45 
46     /** Encoding types */
47     private static final int TYPE_ENCODING = 0;
48     private static final int TYPE_CMAP = 1;
49     /**
50      * the base encoding (an array of integers which can be mapped to names
51      * using the methods on FontSupport
52      */
53     private int[] baseEncoding;
54     /** any differences from the base encoding */
55     private Map<Character,String> differences;
56     /**
57      * a CMap for fonts encoded by CMap
58      */
59     private PDFCMap cmap;
60     /**
61      * the type of this encoding (encoding or CMap)
62      */
63     private int type;
64 
65     /** Creates a new instance of PDFFontEncoding */
PDFFontEncoding(String fontType, PDFObject encoding)66     public PDFFontEncoding(String fontType, PDFObject encoding)
67             throws IOException {
68         if (encoding.getType() == PDFObject.NAME) {
69             // if the encoding is a String, it is the name of an encoding
70             // or the name of a CMap, depending on the type of the font
71             if (fontType.equals("Type0")) {
72                 type = TYPE_CMAP;
73                 cmap = PDFCMap.getCMap(encoding.getStringValue());
74             } else {
75                 type = TYPE_ENCODING;
76 
77                 differences = new HashMap<Character,String>();
78                 baseEncoding = this.getBaseEncoding(encoding.getStringValue());
79             }
80         } else {
81             // loook at the "Type" entry of the encoding to determine the type
82             String typeStr = encoding.getDictRef("Type").getStringValue();
83 
84             if (typeStr.equals("Encoding")) {
85                 // it is an encoding
86                 type = TYPE_ENCODING;
87                 parseEncoding(encoding);
88             } else if (typeStr.equals("CMap")) {
89                 // it is a CMap
90                 type = TYPE_CMAP;
91                 cmap = PDFCMap.getCMap(encoding);
92             } else {
93                 throw new IllegalArgumentException("Uknown encoding type: " + type);
94             }
95         }
96     }
97 
98     /** Get the glyphs associated with a given String */
getGlyphs(PDFFont font, String text)99     public List<PDFGlyph> getGlyphs(PDFFont font, String text) {
100         List<PDFGlyph> outList = new ArrayList<PDFGlyph>(text.length());
101 
102         // go character by character through the text
103         char[] arry = text.toCharArray();
104         for (int i = 0; i < arry.length; i++) {
105             switch (type) {
106                 case TYPE_ENCODING:
107                     outList.add(getGlyphFromEncoding(font, arry[i]));
108                     break;
109                 case TYPE_CMAP:
110                     // 2 bytes -> 1 character in a CMap
111                     char c = (char) ((arry[i] & 0xff) << 8);
112                     if (i < arry.length - 1) {
113                         c |= (char) (arry[++i] & 0xff);
114                     }
115                     outList.add(getGlyphFromCMap(font, c));
116                     break;
117             }
118         }
119 
120         return outList;
121     }
122 
123     /**
124      * Get a glyph from an encoding, given a font and character
125      */
getGlyphFromEncoding(PDFFont font, char src)126     private PDFGlyph getGlyphFromEncoding(PDFFont font, char src) {
127         String charName = null;
128 
129         // only deal with one byte of source
130         src &= 0xff;
131 
132         // see if this character is in the differences list
133         if (differences.containsKey(new Character(src))) {
134             charName = (String) differences.get(new Character(src));
135         } else if (baseEncoding != null) {
136             // get the character name from the base encoding
137             int charID = baseEncoding[src];
138             charName = FontSupport.getName(charID);
139         }
140 
141         return font.getCachedGlyph(src, charName);
142     }
143 
144     /**
145      * Get a glyph from a CMap, given a Type0 font and a character
146      */
getGlyphFromCMap(PDFFont font, char src)147     private PDFGlyph getGlyphFromCMap(PDFFont font, char src) {
148         int fontID = cmap.getFontID(src);
149         char charID = cmap.map(src);
150 
151         if (font instanceof Type0Font) {
152             font = ((Type0Font) font).getDescendantFont(fontID);
153         }
154 
155         return font.getCachedGlyph(charID, null);
156     }
157 
158     /**
159      * Parse a PDF encoding object for the actual encoding
160      */
parseEncoding(PDFObject encoding)161     public void parseEncoding(PDFObject encoding) throws IOException {
162         differences = new HashMap<Character,String>();
163 
164         // figure out the base encoding, if one exists
165         PDFObject baseEncObj = encoding.getDictRef("BaseEncoding");
166         if (baseEncObj != null) {
167             baseEncoding = getBaseEncoding(baseEncObj.getStringValue());
168         }
169 
170         // parse the differences array
171         PDFObject diffArrayObj = encoding.getDictRef("Differences");
172         if (diffArrayObj != null) {
173             PDFObject[] diffArray = diffArrayObj.getArray();
174             int curPosition = -1;
175 
176             for (int i = 0; i < diffArray.length; i++) {
177                 if (diffArray[i].getType() == PDFObject.NUMBER) {
178                     curPosition = diffArray[i].getIntValue();
179                 } else if (diffArray[i].getType() == PDFObject.NAME) {
180                     Character key = new Character((char) curPosition);
181                     differences.put(key, diffArray[i].getStringValue());
182                     curPosition++;
183                 } else {
184                     throw new IllegalArgumentException("Unexpected type in diff array: " + diffArray[i]);
185                 }
186             }
187         }
188     }
189 
190     /** Get the base encoding for a given name */
getBaseEncoding(String encodingName)191     private int[] getBaseEncoding(String encodingName) {
192         if (encodingName.equals("MacRomanEncoding")) {
193             return FontSupport.macRomanEncoding;
194         } else if (encodingName.equals("MacExpertEncoding")) {
195             return FontSupport.type1CExpertCharset;
196         } else if (encodingName.equals("WinAnsiEncoding")) {
197             return FontSupport.winAnsiEncoding;
198         } else {
199             throw new IllegalArgumentException("Unknown encoding: " + encodingName);
200         }
201     }
202 }
203