1 /* CharsetEncoder.java --
2    Copyright (C) 2002 Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 package java.nio.charset;
39 
40 import java.nio.ByteBuffer;
41 import java.nio.CharBuffer;
42 
43 /**
44  * @author Jesse Rosenstock
45  * @since 1.4
46  */
47 public abstract class CharsetEncoder
48 {
49   private static final int STATE_RESET   = 0;
50   private static final int STATE_CODING  = 1;
51   private static final int STATE_END     = 2;
52   private static final int STATE_FLUSHED = 3;
53 
54   private static final byte[] DEFAULT_REPLACEMENT = {(byte)'?'};
55 
56   private final Charset charset;
57   private final float averageBytesPerChar;
58   private final float maxBytesPerChar;
59   private byte[] replacement;
60 
61   private int state = STATE_RESET;
62 
63   private CodingErrorAction malformedInputAction
64     = CodingErrorAction.REPORT;
65   private CodingErrorAction unmappableCharacterAction
66     = CodingErrorAction.REPORT;
67 
CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar)68   protected CharsetEncoder (Charset cs, float averageBytesPerChar,
69                             float maxBytesPerChar)
70   {
71     this (cs, averageBytesPerChar, maxBytesPerChar, DEFAULT_REPLACEMENT);
72   }
73 
CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement)74   protected CharsetEncoder (Charset cs, float averageBytesPerChar,
75                             float maxBytesPerChar, byte[] replacement)
76   {
77     if (averageBytesPerChar <= 0.0f)
78       throw new IllegalArgumentException ("Non-positive averageBytesPerChar");
79     if (maxBytesPerChar <= 0.0f)
80       throw new IllegalArgumentException ("Non-positive maxBytesPerChar");
81 
82     this.charset = cs;
83     this.averageBytesPerChar
84       = averageBytesPerChar;
85     this.maxBytesPerChar
86       = maxBytesPerChar;
87     this.replacement = replacement;
88     implReplaceWith (replacement);
89   }
90 
averageBytesPerChar()91   public final float averageBytesPerChar ()
92   {
93     return averageBytesPerChar;
94   }
95 
canEncode(char c)96   public boolean canEncode (char c)
97   {
98     CharBuffer cb = CharBuffer.allocate (1).put (c);
99     cb.flip ();
100     return canEncode (cb);
101   }
102 
canEncode(CharSequence cs)103   public boolean canEncode (CharSequence cs)
104   {
105     CharBuffer cb;
106     if (cs instanceof CharBuffer)
107       cb = ((CharBuffer) cs).duplicate ();
108     else
109       cb = CharBuffer.wrap (cs);
110     return canEncode (cb);
111   }
112 
canEncode(CharBuffer cb)113   private boolean canEncode (CharBuffer cb)
114   {
115     // It is an error if a coding operation is "in progress"
116     // I take that to mean the state is not reset or flushed.
117     // XXX: check "in progress" everywhere
118     if (state == STATE_FLUSHED)
119       reset ();
120     else if (state != STATE_RESET)
121       throw new IllegalStateException ();
122 
123     CodingErrorAction oldMalformedInputAction = malformedInputAction;
124     CodingErrorAction oldUnmappableCharacterAction
125       = unmappableCharacterAction;
126 
127     try
128       {
129         if (oldMalformedInputAction != CodingErrorAction.REPORT)
130           onMalformedInput (CodingErrorAction.REPORT);
131         if (oldUnmappableCharacterAction != CodingErrorAction.REPORT)
132           onUnmappableCharacter (CodingErrorAction.REPORT);
133       }
134     catch (Exception e)
135       {
136         return false;
137       }
138     finally
139       {
140         if (oldMalformedInputAction != CodingErrorAction.REPORT)
141           onMalformedInput (oldMalformedInputAction);
142         if (oldUnmappableCharacterAction != CodingErrorAction.REPORT)
143           onUnmappableCharacter (oldUnmappableCharacterAction);
144       }
145 
146     return true;
147   }
148 
charset()149   public final Charset charset ()
150   {
151     return charset;
152   }
153 
encode(CharBuffer in)154   public final ByteBuffer encode (CharBuffer in)
155     throws CharacterCodingException
156   {
157     // XXX: Sun's Javadoc seems to contradict itself saying an
158     // IllegalStateException is thrown "if a decoding operation is already
159     // in progress" and also that "it resets this Encoder".
160     // Should we check to see that the state is reset, or should we
161     // call reset()?
162     if (state != STATE_RESET)
163       throw new IllegalStateException ();
164 
165     // REVIEW: Using max instead of average may allocate a very large
166     // buffer.  Maybe we should do something more efficient?
167     int remaining = in.remaining ();
168     int n = (int) (remaining * maxBytesPerChar ());
169     ByteBuffer out = ByteBuffer.allocate (n);
170 
171     if (remaining == 0)
172       {
173         state = STATE_FLUSHED;
174         return out;
175       }
176 
177     CoderResult cr = encode (in, out, true);
178     if (cr.isError ())
179       cr.throwException ();
180 
181     cr = flush (out);
182     if (cr.isError ())
183       cr.throwException ();
184 
185     out.flip ();
186 
187     // Unfortunately, resizing the actual bytebuffer array is required.
188     byte[] resized = new byte[out.remaining()];
189     out.get(resized);
190     return ByteBuffer.wrap(resized);
191   }
192 
encode(CharBuffer in, ByteBuffer out, boolean endOfInput)193   public final CoderResult encode (CharBuffer in, ByteBuffer out,
194                                    boolean endOfInput)
195   {
196     int newState = endOfInput ? STATE_END : STATE_CODING;
197     // XXX: Need to check for "previous step was an invocation [not] of
198     // this method with a value of true for the endOfInput parameter but
199     // a return value indicating an incomplete decoding operation"
200     // XXX: We will not check the previous return value, just
201     // that the previous call passed true for endOfInput
202     if (state != STATE_RESET && state != STATE_CODING
203         && !(endOfInput && state == STATE_END))
204       throw new IllegalStateException ();
205     state = newState;
206 
207     for (;;)
208       {
209         CoderResult cr;
210         try
211           {
212             cr = encodeLoop (in, out);
213           }
214         catch (RuntimeException e)
215           {
216             throw new CoderMalfunctionError (e);
217           }
218 
219         if (cr.isOverflow ())
220           return cr;
221 
222         if (cr.isUnderflow ())
223           {
224             if (endOfInput && in.hasRemaining ())
225               cr = CoderResult.malformedForLength (in.remaining ());
226             else
227               return cr;
228           }
229 
230         CodingErrorAction action = cr.isMalformed ()
231                                      ? malformedInputAction
232                                      : unmappableCharacterAction;
233 
234         if (action == CodingErrorAction.REPORT)
235           return cr;
236 
237         if (action == CodingErrorAction.REPLACE)
238           {
239             if (out.remaining () < replacement.length)
240               return CoderResult.OVERFLOW;
241             out.put (replacement);
242           }
243 
244         in.position (in.position () + cr.length ());
245       }
246   }
247 
encodeLoop(CharBuffer in, ByteBuffer out)248   protected abstract CoderResult encodeLoop (CharBuffer in, ByteBuffer out);
249 
flush(ByteBuffer out)250   public final CoderResult flush (ByteBuffer out)
251   {
252     // It seems weird that you can flush after reset, but Sun's javadoc
253     // says an IllegalStateException is thrown "If the previous step of the
254     // current decoding operation was an invocation neither of the reset
255     // method nor ... of the three-argument encode method with a value of
256     // true for the endOfInput parameter."
257     // Further note that flush() only requires that there not be
258     // an IllegalStateException if the previous step was a call to
259     // encode with true as the last argument.  It does not require
260     // that the call succeeded.  encode() does require that it succeeded.
261     // XXX: test this to see if reality matches javadoc
262     if (state != STATE_RESET && state != STATE_END)
263       throw new IllegalStateException ();
264 
265     state = STATE_FLUSHED;
266     return implFlush (out);
267   }
268 
implFlush(ByteBuffer out)269   protected CoderResult implFlush (ByteBuffer out)
270   {
271     return CoderResult.UNDERFLOW;
272   }
273 
implOnMalformedInput(CodingErrorAction newAction)274   protected void implOnMalformedInput (CodingErrorAction newAction)
275   {
276     // default implementation does nothing
277   }
278 
implOnUnmappableCharacter(CodingErrorAction newAction)279   protected void implOnUnmappableCharacter (CodingErrorAction newAction)
280   {
281     // default implementation does nothing
282   }
283 
implReplaceWith(byte[] newReplacement)284   protected void implReplaceWith (byte[] newReplacement)
285   {
286     // default implementation does nothing
287   }
288 
implReset()289   protected void implReset ()
290   {
291     // default implementation does nothing
292   }
293 
isLegalReplacement(byte[] replacement)294   public boolean isLegalReplacement (byte[] replacement)
295   {
296     // TODO: cache the decoder
297     // error actions will be REPORT after construction
298     CharsetDecoder decoder = charset.newDecoder ();
299     ByteBuffer bb = ByteBuffer.wrap (replacement);
300     CharBuffer cb
301       = CharBuffer.allocate ((int) (replacement.length
302                                     * decoder.maxCharsPerByte ()));
303     return !decoder.decode (bb, cb, true).isError ();
304   }
305 
malformedInputAction()306   public CodingErrorAction malformedInputAction ()
307   {
308     return malformedInputAction;
309   }
310 
maxBytesPerChar()311   public final float maxBytesPerChar ()
312   {
313     return maxBytesPerChar;
314   }
315 
onMalformedInput(CodingErrorAction newAction)316   public final CharsetEncoder onMalformedInput (CodingErrorAction newAction)
317   {
318     if (newAction == null)
319       throw new IllegalArgumentException ("Null action");
320 
321     malformedInputAction = newAction;
322     implOnMalformedInput (newAction);
323     return this;
324   }
325 
unmappableCharacterAction()326   public CodingErrorAction unmappableCharacterAction ()
327   {
328     return unmappableCharacterAction;
329   }
330 
onUnmappableCharacter(CodingErrorAction newAction)331   public final CharsetEncoder onUnmappableCharacter
332     (CodingErrorAction newAction)
333   {
334     if (newAction == null)
335       throw new IllegalArgumentException ("Null action");
336 
337     unmappableCharacterAction = newAction;
338     implOnUnmappableCharacter (newAction);
339     return this;
340   }
341 
replacement()342   public final byte[] replacement ()
343   {
344     return replacement;
345   }
346 
replaceWith(byte[] newReplacement)347   public final CharsetEncoder replaceWith (byte[] newReplacement)
348   {
349     if (newReplacement == null)
350       throw new IllegalArgumentException ("Null replacement");
351     if (newReplacement.length == 0)
352       throw new IllegalArgumentException ("Empty replacement");
353     // XXX: what about maxBytesPerChar?
354 
355       if (!isLegalReplacement (newReplacement))
356         throw new IllegalArgumentException ("Illegal replacement");
357 
358     this.replacement = newReplacement;
359     implReplaceWith (newReplacement);
360     return this;
361   }
362 
reset()363   public final CharsetEncoder reset ()
364   {
365     state = STATE_RESET;
366     implReset ();
367     return this;
368   }
369 }
370