pdfview/font/PDFFontEncoding.java

/*
 * $Id: PDFFontEncoding.java,v 1.4 2009-02-12 13:53:54 tomoke Exp $
 *
 * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
 * Santa Clara, California 95054, U.S.A. All rights reserved.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
package com.sun.pdfview.font;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.sun.pdfview.PDFObject;

/**
 * The PDFFont encoding encapsulates the mapping from character codes
 * in the PDF document to glyphs of the font.
 *
 * Encodings take two basic forms.  For Type1, TrueType, and Type3 fonts,
 * the encoding maps from character codes to Strings, which represent the
 * glyphs of the font.  For Type0 fonts, the mapping is a CMap which maps
 * character codes to characters in one of many descendant fonts.
 *
 * Note that the data in the PDF might be ASCII characters (bytes) or it might
 * be a multi-byte format such as unicode.  For now we will assume all
 * glyph ids fit into at most the two bytes of a character.
 */
public class PDFFontEncoding {

    /** Encoding types */
    private static final int TYPE_ENCODING = 0;
    private static final int TYPE_CMAP = 1;
    /**
     * the base encoding (an array of integers which can be mapped to names
     * using the methods on FontSupport
     */
    private int[] baseEncoding;
    /** any differences from the base encoding */
    private Map<Character,String> differences;
    /**
     * a CMap for fonts encoded by CMap
     */
    private PDFCMap cmap;
    /**
     * the type of this encoding (encoding or CMap)
     */
    private int type;

    /** Creates a new instance of PDFFontEncoding */
    public PDFFontEncoding(String fontType, PDFObject encoding)
            throws IOException {
        if (encoding.getType() == PDFObject.NAME) {
            // if the encoding is a String, it is the name of an encoding
            // or the name of a CMap, depending on the type of the font
            if (fontType.equals("Type0")) {
                type = TYPE_CMAP;
                cmap = PDFCMap.getCMap(encoding.getStringValue());
            } else {
                type = TYPE_ENCODING;

                differences = new HashMap<Character,String>();
                baseEncoding = this.getBaseEncoding(encoding.getStringValue());
            }
        } else {
            // loook at the "Type" entry of the encoding to determine the type
            String typeStr = encoding.getDictRef("Type").getStringValue();

            if (typeStr.equals("Encoding")) {
                // it is an encoding
                type = TYPE_ENCODING;
                parseEncoding(encoding);
            } else if (typeStr.equals("CMap")) {
                // it is a CMap
                type = TYPE_CMAP;
                cmap = PDFCMap.getCMap(encoding);
            } else {
                throw new IllegalArgumentException("Uknown encoding type: " + type);
            }
        }
    }

    /** Get the glyphs associated with a given String */
    public List<PDFGlyph> getGlyphs(PDFFont font, String text) {
        List<PDFGlyph> outList = new ArrayList<PDFGlyph>(text.length());

        // go character by character through the text
        char[] arry = text.toCharArray();
        for (int i = 0; i < arry.length; i++) {
            switch (type) {
                case TYPE_ENCODING:
                    outList.add(getGlyphFromEncoding(font, arry[i]));
                    break;
                case TYPE_CMAP:
                    // 2 bytes -> 1 character in a CMap
                    char c = (char) ((arry[i] & 0xff) << 8);
                    if (i < arry.length - 1) {
                        c |= (char) (arry[++i] & 0xff);
                    }
                    outList.add(getGlyphFromCMap(font, c));
                    break;
            }
        }

        return outList;
    }

    /**
     * Get a glyph from an encoding, given a font and character
     */
    private PDFGlyph getGlyphFromEncoding(PDFFont font, char src) {
        String charName = null;

        // only deal with one byte of source
        src &= 0xff;

        // see if this character is in the differences list
        if (differences.containsKey(new Character(src))) {
            charName = (String) differences.get(new Character(src));
        } else if (baseEncoding != null) {
            // get the character name from the base encoding
            int charID = baseEncoding[src];
            charName = FontSupport.getName(charID);
        }

        return font.getCachedGlyph(src, charName);
    }

    /**
     * Get a glyph from a CMap, given a Type0 font and a character
     */
    private PDFGlyph getGlyphFromCMap(PDFFont font, char src) {
        int fontID = cmap.getFontID(src);
        char charID = cmap.map(src);

        if (font instanceof Type0Font) {
            font = ((Type0Font) font).getDescendantFont(fontID);
        }

        return font.getCachedGlyph(charID, null);
    }

    /**
     * Parse a PDF encoding object for the actual encoding
     */
    public void parseEncoding(PDFObject encoding) throws IOException {
        differences = new HashMap<Character,String>();

        // figure out the base encoding, if one exists
        PDFObject baseEncObj = encoding.getDictRef("BaseEncoding");
        if (baseEncObj != null) {
            baseEncoding = getBaseEncoding(baseEncObj.getStringValue());
        }

        // parse the differences array
        PDFObject diffArrayObj = encoding.getDictRef("Differences");
        if (diffArrayObj != null) {
            PDFObject[] diffArray = diffArrayObj.getArray();
            int curPosition = -1;

            for (int i = 0; i < diffArray.length; i++) {
                if (diffArray[i].getType() == PDFObject.NUMBER) {
                    curPosition = diffArray[i].getIntValue();
                } else if (diffArray[i].getType() == PDFObject.NAME) {
                    Character key = new Character((char) curPosition);
                    differences.put(key, diffArray[i].getStringValue());
                    curPosition++;
                } else {
                    throw new IllegalArgumentException("Unexpected type in diff array: " + diffArray[i]);
                }
            }
        }
    }

    /** Get the base encoding for a given name */
    private int[] getBaseEncoding(String encodingName) {
        if (encodingName.equals("MacRomanEncoding")) {
            return FontSupport.macRomanEncoding;
        } else if (encodingName.equals("MacExpertEncoding")) {
            return FontSupport.type1CExpertCharset;
        } else if (encodingName.equals("WinAnsiEncoding")) {
            return FontSupport.winAnsiEncoding;
        } else {
            throw new IllegalArgumentException("Unknown encoding: " + encodingName);
        }
    }
}