1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* $Id$ */ 19 20 package org.apache.fop.afp.fonts; 21 22 import java.io.IOException; 23 import java.io.OutputStream; 24 import java.nio.ByteBuffer; 25 import java.nio.CharBuffer; 26 import java.nio.charset.CharacterCodingException; 27 import java.nio.charset.Charset; 28 import java.nio.charset.CharsetEncoder; 29 import java.nio.charset.CodingErrorAction; 30 31 /** 32 * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a 33 * specified format. 34 */ 35 public abstract class CharactersetEncoder { 36 37 private final CharsetEncoder encoder; 38 CharactersetEncoder(String encoding)39 private CharactersetEncoder(String encoding) { 40 this.encoder = Charset.forName(encoding).newEncoder(); 41 this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 42 } 43 44 /** 45 * Tells whether or not this encoder can encode the given character. 46 * 47 * @param c the character 48 * @return true if, and only if, this encoder can encode the given character 49 * @throws IllegalStateException - If an encoding operation is already in progress 50 */ canEncode(char c)51 final boolean canEncode(char c) { 52 return encoder.canEncode(c); 53 } 54 55 /** 56 * Encodes a character sequence to a byte array. 57 * 58 * @param chars the character sequence 59 * @return the encoded character sequence 60 * @throws CharacterCodingException if the encoding operation fails 61 */ encode(CharSequence chars)62 final EncodedChars encode(CharSequence chars) throws CharacterCodingException { 63 ByteBuffer bb; 64 // encode method is not thread safe 65 synchronized (encoder) { 66 bb = encoder.encode(CharBuffer.wrap(chars)); 67 } 68 if (bb.hasArray()) { 69 return getEncodedChars(bb.array(), bb.limit()); 70 } else { 71 bb.rewind(); 72 byte[] bytes = new byte[bb.remaining()]; 73 bb.get(bytes); 74 return getEncodedChars(bytes, bytes.length); 75 } 76 } 77 getEncodedChars(byte[] byteArray, int length)78 abstract EncodedChars getEncodedChars(byte[] byteArray, int length); 79 80 /** 81 * Encodes <code>chars</code> into a format specified by <code>encoding</code>. 82 * 83 * @param chars the character sequence 84 * @param encoding the encoding type 85 * @return encoded data 86 * @throws CharacterCodingException if encoding fails 87 */ encodeSBCS(CharSequence chars, String encoding)88 public static EncodedChars encodeSBCS(CharSequence chars, String encoding) 89 throws CharacterCodingException { 90 CharactersetEncoder encoder = CharacterSetType.SINGLE_BYTE.getEncoder(encoding); 91 return encoder.encode(chars); 92 } 93 94 /** 95 * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character 96 * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character 97 * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators 98 * are removed from the sequence of bytes. These are only used in Line Data. 99 */ 100 static final class EbcdicDoubleByteLineDataEncoder extends CharactersetEncoder { EbcdicDoubleByteLineDataEncoder(String encoding)101 EbcdicDoubleByteLineDataEncoder(String encoding) { 102 super(encoding); 103 } 104 @Override getEncodedChars(byte[] byteArray, int length)105 EncodedChars getEncodedChars(byte[] byteArray, int length) { 106 if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) { 107 return new EncodedChars(byteArray, 1, length - 2, true); 108 } 109 return new EncodedChars(byteArray, true); 110 } 111 } 112 113 /** 114 * The default encoder is used for encoding IBM format SBCS (single byte character sets), this 115 * the primary format for most Latin character sets. This can also be used for Unicode double- 116 * byte character sets (DBCS). 117 */ 118 static final class DefaultEncoder extends CharactersetEncoder { 119 private final boolean isDBCS; 120 DefaultEncoder(String encoding, boolean isDBCS)121 DefaultEncoder(String encoding, boolean isDBCS) { 122 super(encoding); 123 this.isDBCS = isDBCS; 124 } 125 126 @Override getEncodedChars(byte[] byteArray, int length)127 EncodedChars getEncodedChars(byte[] byteArray, int length) { 128 return new EncodedChars(byteArray, isDBCS); 129 } 130 } 131 132 /** 133 * A container for encoded character bytes 134 */ 135 // CSOFF: FinalClass - disabling "final" modifier so that this class can be mocked 136 public static class EncodedChars { 137 138 private final byte[] bytes; 139 private final int offset; 140 private final int length; 141 private final boolean isDBCS; 142 EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS)143 private EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS) { 144 if (offset < 0 || length < 0 || offset + length > bytes.length) { 145 throw new IllegalArgumentException(); 146 } 147 this.bytes = bytes; 148 this.offset = offset; 149 this.length = length; 150 this.isDBCS = isDBCS; 151 } 152 EncodedChars(byte[] bytes, boolean isDBCS)153 private EncodedChars(byte[] bytes, boolean isDBCS) { 154 this(bytes, 0, bytes.length, isDBCS); 155 } 156 157 /** 158 * write <code>length</code> bytes from <code>offset</code> to the output stream 159 * 160 * @param out output to write the bytes to 161 * @param offset the offset where to write 162 * @param length the length to write 163 * @throws IOException if an I/O error occurs 164 */ writeTo(OutputStream out, int offset, int length)165 public void writeTo(OutputStream out, int offset, int length) throws IOException { 166 if (offset < 0 || length < 0 || offset + length > bytes.length) { 167 throw new IllegalArgumentException(); 168 } 169 out.write(bytes, this.offset + offset, length); 170 } 171 172 /** 173 * The number of containing bytes. 174 * 175 * @return the length 176 */ getLength()177 public int getLength() { 178 return length; 179 } 180 181 /** 182 * Indicates whether or not the EncodedChars object wraps double byte characters. 183 * 184 * @return true if the wrapped characters are double byte (DBCSs) 185 */ isDBCS()186 public boolean isDBCS() { 187 return isDBCS; 188 } 189 190 /** 191 * The bytes 192 * 193 * @return the bytes 194 */ getBytes()195 public byte[] getBytes() { 196 // return copy just in case 197 byte[] copy = new byte[bytes.length]; 198 System.arraycopy(bytes, 0, copy, 0, bytes.length); 199 return copy; 200 } 201 } 202 } 203