1 /* 2 * $Id: PDFFontEncoding.java,v 1.4 2009-02-12 13:53:54 tomoke Exp $ 3 * 4 * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle, 5 * Santa Clara, California 95054, U.S.A. All rights reserved. 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this library; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 package com.sun.pdfview.font; 22 23 import java.io.IOException; 24 import java.util.ArrayList; 25 import java.util.HashMap; 26 import java.util.List; 27 import java.util.Map; 28 29 import com.sun.pdfview.PDFObject; 30 31 /** 32 * The PDFFont encoding encapsulates the mapping from character codes 33 * in the PDF document to glyphs of the font. 34 * 35 * Encodings take two basic forms. For Type1, TrueType, and Type3 fonts, 36 * the encoding maps from character codes to Strings, which represent the 37 * glyphs of the font. For Type0 fonts, the mapping is a CMap which maps 38 * character codes to characters in one of many descendant fonts. 39 * 40 * Note that the data in the PDF might be ASCII characters (bytes) or it might 41 * be a multi-byte format such as unicode. For now we will assume all 42 * glyph ids fit into at most the two bytes of a character. 43 */ 44 public class PDFFontEncoding { 45 46 /** Encoding types */ 47 private static final int TYPE_ENCODING = 0; 48 private static final int TYPE_CMAP = 1; 49 /** 50 * the base encoding (an array of integers which can be mapped to names 51 * using the methods on FontSupport 52 */ 53 private int[] baseEncoding; 54 /** any differences from the base encoding */ 55 private Map<Character,String> differences; 56 /** 57 * a CMap for fonts encoded by CMap 58 */ 59 private PDFCMap cmap; 60 /** 61 * the type of this encoding (encoding or CMap) 62 */ 63 private int type; 64 65 /** Creates a new instance of PDFFontEncoding */ PDFFontEncoding(String fontType, PDFObject encoding)66 public PDFFontEncoding(String fontType, PDFObject encoding) 67 throws IOException { 68 if (encoding.getType() == PDFObject.NAME) { 69 // if the encoding is a String, it is the name of an encoding 70 // or the name of a CMap, depending on the type of the font 71 if (fontType.equals("Type0")) { 72 type = TYPE_CMAP; 73 cmap = PDFCMap.getCMap(encoding.getStringValue()); 74 } else { 75 type = TYPE_ENCODING; 76 77 differences = new HashMap<Character,String>(); 78 baseEncoding = this.getBaseEncoding(encoding.getStringValue()); 79 } 80 } else { 81 // loook at the "Type" entry of the encoding to determine the type 82 String typeStr = encoding.getDictRef("Type").getStringValue(); 83 84 if (typeStr.equals("Encoding")) { 85 // it is an encoding 86 type = TYPE_ENCODING; 87 parseEncoding(encoding); 88 } else if (typeStr.equals("CMap")) { 89 // it is a CMap 90 type = TYPE_CMAP; 91 cmap = PDFCMap.getCMap(encoding); 92 } else { 93 throw new IllegalArgumentException("Uknown encoding type: " + type); 94 } 95 } 96 } 97 98 /** Get the glyphs associated with a given String */ getGlyphs(PDFFont font, String text)99 public List<PDFGlyph> getGlyphs(PDFFont font, String text) { 100 List<PDFGlyph> outList = new ArrayList<PDFGlyph>(text.length()); 101 102 // go character by character through the text 103 char[] arry = text.toCharArray(); 104 for (int i = 0; i < arry.length; i++) { 105 switch (type) { 106 case TYPE_ENCODING: 107 outList.add(getGlyphFromEncoding(font, arry[i])); 108 break; 109 case TYPE_CMAP: 110 // 2 bytes -> 1 character in a CMap 111 char c = (char) ((arry[i] & 0xff) << 8); 112 if (i < arry.length - 1) { 113 c |= (char) (arry[++i] & 0xff); 114 } 115 outList.add(getGlyphFromCMap(font, c)); 116 break; 117 } 118 } 119 120 return outList; 121 } 122 123 /** 124 * Get a glyph from an encoding, given a font and character 125 */ getGlyphFromEncoding(PDFFont font, char src)126 private PDFGlyph getGlyphFromEncoding(PDFFont font, char src) { 127 String charName = null; 128 129 // only deal with one byte of source 130 src &= 0xff; 131 132 // see if this character is in the differences list 133 if (differences.containsKey(new Character(src))) { 134 charName = (String) differences.get(new Character(src)); 135 } else if (baseEncoding != null) { 136 // get the character name from the base encoding 137 int charID = baseEncoding[src]; 138 charName = FontSupport.getName(charID); 139 } 140 141 return font.getCachedGlyph(src, charName); 142 } 143 144 /** 145 * Get a glyph from a CMap, given a Type0 font and a character 146 */ getGlyphFromCMap(PDFFont font, char src)147 private PDFGlyph getGlyphFromCMap(PDFFont font, char src) { 148 int fontID = cmap.getFontID(src); 149 char charID = cmap.map(src); 150 151 if (font instanceof Type0Font) { 152 font = ((Type0Font) font).getDescendantFont(fontID); 153 } 154 155 return font.getCachedGlyph(charID, null); 156 } 157 158 /** 159 * Parse a PDF encoding object for the actual encoding 160 */ parseEncoding(PDFObject encoding)161 public void parseEncoding(PDFObject encoding) throws IOException { 162 differences = new HashMap<Character,String>(); 163 164 // figure out the base encoding, if one exists 165 PDFObject baseEncObj = encoding.getDictRef("BaseEncoding"); 166 if (baseEncObj != null) { 167 baseEncoding = getBaseEncoding(baseEncObj.getStringValue()); 168 } 169 170 // parse the differences array 171 PDFObject diffArrayObj = encoding.getDictRef("Differences"); 172 if (diffArrayObj != null) { 173 PDFObject[] diffArray = diffArrayObj.getArray(); 174 int curPosition = -1; 175 176 for (int i = 0; i < diffArray.length; i++) { 177 if (diffArray[i].getType() == PDFObject.NUMBER) { 178 curPosition = diffArray[i].getIntValue(); 179 } else if (diffArray[i].getType() == PDFObject.NAME) { 180 Character key = new Character((char) curPosition); 181 differences.put(key, diffArray[i].getStringValue()); 182 curPosition++; 183 } else { 184 throw new IllegalArgumentException("Unexpected type in diff array: " + diffArray[i]); 185 } 186 } 187 } 188 } 189 190 /** Get the base encoding for a given name */ getBaseEncoding(String encodingName)191 private int[] getBaseEncoding(String encodingName) { 192 if (encodingName.equals("MacRomanEncoding")) { 193 return FontSupport.macRomanEncoding; 194 } else if (encodingName.equals("MacExpertEncoding")) { 195 return FontSupport.type1CExpertCharset; 196 } else if (encodingName.equals("WinAnsiEncoding")) { 197 return FontSupport.winAnsiEncoding; 198 } else { 199 throw new IllegalArgumentException("Unknown encoding: " + encodingName); 200 } 201 } 202 } 203