1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /* $Id$ */
19 
20 package org.apache.fop.afp.fonts;
21 
22 import java.io.IOException;
23 import java.io.OutputStream;
24 import java.nio.ByteBuffer;
25 import java.nio.CharBuffer;
26 import java.nio.charset.CharacterCodingException;
27 import java.nio.charset.Charset;
28 import java.nio.charset.CharsetEncoder;
29 import java.nio.charset.CodingErrorAction;
30 
31 /**
32  * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a
33  * specified format.
34  */
35 public abstract class CharactersetEncoder {
36 
37     private final CharsetEncoder encoder;
38 
CharactersetEncoder(String encoding)39     private CharactersetEncoder(String encoding) {
40         this.encoder = Charset.forName(encoding).newEncoder();
41         this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
42     }
43 
44     /**
45      * Tells whether or not this encoder can encode the given character.
46      *
47      * @param c the character
48      * @return true if, and only if, this encoder can encode the given character
49      * @throws IllegalStateException - If an encoding operation is already in progress
50      */
canEncode(char c)51     final boolean canEncode(char c) {
52         return encoder.canEncode(c);
53     }
54 
55     /**
56      * Encodes a character sequence to a byte array.
57      *
58      * @param chars the character sequence
59      * @return the encoded character sequence
60      * @throws CharacterCodingException if the encoding operation fails
61      */
encode(CharSequence chars)62     final EncodedChars encode(CharSequence chars) throws CharacterCodingException {
63         ByteBuffer bb;
64         // encode method is not thread safe
65         synchronized (encoder) {
66             bb = encoder.encode(CharBuffer.wrap(chars));
67         }
68         if (bb.hasArray()) {
69             return getEncodedChars(bb.array(), bb.limit());
70         } else {
71             bb.rewind();
72             byte[] bytes = new byte[bb.remaining()];
73             bb.get(bytes);
74             return getEncodedChars(bytes, bytes.length);
75         }
76     }
77 
getEncodedChars(byte[] byteArray, int length)78     abstract EncodedChars getEncodedChars(byte[] byteArray, int length);
79 
80     /**
81      * Encodes <code>chars</code> into a format specified by <code>encoding</code>.
82      *
83      * @param chars the character sequence
84      * @param encoding the encoding type
85      * @return encoded data
86      * @throws CharacterCodingException if encoding fails
87      */
encodeSBCS(CharSequence chars, String encoding)88     public static EncodedChars encodeSBCS(CharSequence chars, String encoding)
89             throws CharacterCodingException {
90         CharactersetEncoder encoder = CharacterSetType.SINGLE_BYTE.getEncoder(encoding);
91         return encoder.encode(chars);
92     }
93 
94     /**
95      * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character
96      * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character
97      * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators
98      * are removed from the sequence of bytes. These are only used in Line Data.
99      */
100     static final class EbcdicDoubleByteLineDataEncoder extends CharactersetEncoder {
EbcdicDoubleByteLineDataEncoder(String encoding)101         EbcdicDoubleByteLineDataEncoder(String encoding) {
102             super(encoding);
103         }
104         @Override
getEncodedChars(byte[] byteArray, int length)105         EncodedChars getEncodedChars(byte[] byteArray, int length) {
106             if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) {
107                 return new EncodedChars(byteArray, 1, length - 2, true);
108             }
109             return new EncodedChars(byteArray, true);
110         }
111     }
112 
113     /**
114      * The default encoder is used for encoding IBM format SBCS (single byte character sets), this
115      * the primary format for most Latin character sets. This can also be used for Unicode double-
116      * byte character sets (DBCS).
117      */
118     static final class DefaultEncoder extends CharactersetEncoder {
119         private final boolean isDBCS;
120 
DefaultEncoder(String encoding, boolean isDBCS)121         DefaultEncoder(String encoding, boolean isDBCS) {
122             super(encoding);
123             this.isDBCS = isDBCS;
124         }
125 
126         @Override
getEncodedChars(byte[] byteArray, int length)127         EncodedChars getEncodedChars(byte[] byteArray, int length) {
128             return new EncodedChars(byteArray, isDBCS);
129         }
130     }
131 
132     /**
133      * A container for encoded character bytes
134      */
135     // CSOFF: FinalClass - disabling "final" modifier so that this class can be mocked
136     public static class EncodedChars {
137 
138         private final byte[] bytes;
139         private final int offset;
140         private final int length;
141         private final boolean isDBCS;
142 
EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS)143         private EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS) {
144             if (offset < 0 || length < 0 || offset + length > bytes.length) {
145                 throw new IllegalArgumentException();
146             }
147             this.bytes = bytes;
148             this.offset = offset;
149             this.length = length;
150             this.isDBCS = isDBCS;
151         }
152 
EncodedChars(byte[] bytes, boolean isDBCS)153         private EncodedChars(byte[] bytes, boolean isDBCS) {
154             this(bytes, 0, bytes.length, isDBCS);
155         }
156 
157         /**
158          * write <code>length</code> bytes from <code>offset</code> to the output stream
159          *
160          * @param out output to write the bytes to
161          * @param offset the offset where to write
162          * @param length the length to write
163          * @throws IOException if an I/O error occurs
164          */
writeTo(OutputStream out, int offset, int length)165         public void writeTo(OutputStream out, int offset, int length) throws IOException {
166             if (offset < 0 || length < 0 || offset + length > bytes.length) {
167                 throw new IllegalArgumentException();
168             }
169             out.write(bytes, this.offset + offset, length);
170         }
171 
172         /**
173          * The number of containing bytes.
174          *
175          * @return the length
176          */
getLength()177         public int getLength() {
178             return length;
179         }
180 
181         /**
182          * Indicates whether or not the EncodedChars object wraps double byte characters.
183          *
184          * @return true if the wrapped characters are double byte (DBCSs)
185          */
isDBCS()186         public boolean isDBCS() {
187             return isDBCS;
188         }
189 
190         /**
191          * The bytes
192          *
193          * @return the bytes
194          */
getBytes()195         public byte[] getBytes() {
196             // return copy just in case
197             byte[] copy = new byte[bytes.length];
198             System.arraycopy(bytes, 0, copy, 0, bytes.length);
199             return copy;
200         }
201     }
202 }
203