1 /* CharsetEncoder.java --
2    Copyright (C) 2002 Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 package java.nio.charset;
39 
40 import java.nio.ByteBuffer;
41 import java.nio.CharBuffer;
42 
43 /**
44  * @author Jesse Rosenstock
45  * @since 1.4
46  */
47 public abstract class CharsetEncoder
48 {
49   private static final int STATE_RESET   = 0;
50   private static final int STATE_CODING  = 1;
51   private static final int STATE_END     = 2;
52   private static final int STATE_FLUSHED = 3;
53 
54   private static final byte[] DEFAULT_REPLACEMENT = {(byte)'?'};
55 
56   private final Charset charset;
57   private final float averageBytesPerChar;
58   private final float maxBytesPerChar;
59   private byte[] replacement;
60 
61   private int state = STATE_RESET;
62 
63   private CodingErrorAction malformedInputAction
64     = CodingErrorAction.REPORT;
65   private CodingErrorAction unmappableCharacterAction
66     = CodingErrorAction.REPORT;
67 
CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar)68   protected CharsetEncoder (Charset cs, float averageBytesPerChar,
69                             float maxBytesPerChar)
70   {
71     this (cs, averageBytesPerChar, maxBytesPerChar, DEFAULT_REPLACEMENT);
72   }
73 
CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement)74   protected CharsetEncoder (Charset cs, float averageBytesPerChar,
75                             float maxBytesPerChar, byte[] replacement)
76   {
77     if (averageBytesPerChar <= 0.0f)
78       throw new IllegalArgumentException ("Non-positive averageBytesPerChar");
79     if (maxBytesPerChar <= 0.0f)
80       throw new IllegalArgumentException ("Non-positive maxBytesPerChar");
81 
82     this.charset = cs;
83     this.averageBytesPerChar
84       = averageBytesPerChar;
85     this.maxBytesPerChar
86       = maxBytesPerChar;
87     this.replacement = replacement;
88     implReplaceWith (replacement);
89   }
90 
averageBytesPerChar()91   public final float averageBytesPerChar ()
92   {
93     return averageBytesPerChar;
94   }
95 
canEncode(char c)96   public boolean canEncode (char c)
97   {
98     CharBuffer cb = CharBuffer.allocate (1).put (c);
99     cb.flip ();
100     return canEncode (cb);
101   }
102 
canEncode(CharSequence cs)103   public boolean canEncode (CharSequence cs)
104   {
105     CharBuffer cb;
106     if (cs instanceof CharBuffer)
107       cb = ((CharBuffer) cs).duplicate ();
108     else
109       cb = CharBuffer.wrap (cs);
110     return canEncode (cb);
111   }
112 
canEncode(CharBuffer cb)113   private boolean canEncode (CharBuffer cb)
114   {
115     // It is an error if a coding operation is "in progress"
116     // I take that to mean the state is not reset or flushed.
117     // XXX: check "in progress" everywhere
118     if (state == STATE_FLUSHED)
119       reset ();
120     else if (state != STATE_RESET)
121       throw new IllegalStateException ();
122 
123     CodingErrorAction oldMalformedInputAction = malformedInputAction;
124     CodingErrorAction oldUnmappableCharacterAction
125       = unmappableCharacterAction;
126 
127     try
128       {
129         if (oldMalformedInputAction != CodingErrorAction.REPORT)
130           onMalformedInput (CodingErrorAction.REPORT);
131         if (oldUnmappableCharacterAction != CodingErrorAction.REPORT)
132           onUnmappableCharacter (CodingErrorAction.REPORT);
133       }
134     catch (Exception e)
135       {
136         return false;
137       }
138     finally
139       {
140         if (oldMalformedInputAction != CodingErrorAction.REPORT)
141           onMalformedInput (oldMalformedInputAction);
142         if (oldUnmappableCharacterAction != CodingErrorAction.REPORT)
143           onUnmappableCharacter (oldUnmappableCharacterAction);
144       }
145 
146     return true;
147   }
148 
charset()149   public final Charset charset ()
150   {
151     return charset;
152   }
153 
encode(CharBuffer in)154   public final ByteBuffer encode (CharBuffer in)
155     throws CharacterCodingException
156   {
157     // XXX: Sun's Javadoc seems to contradict itself saying an
158     // IllegalStateException is thrown "if a decoding operation is already
159     // in progress" and also that "it resets this Encoder".
160     // Should we check to see that the state is reset, or should we
161     // call reset()?
162     if (state != STATE_RESET)
163       throw new IllegalStateException ();
164 
165     // REVIEW: Using max instead of average may allocate a very large
166     // buffer.  Maybe we should do something more efficient?
167     int remaining = in.remaining ();
168     int n = (int) (remaining * maxBytesPerChar ());
169     ByteBuffer out = ByteBuffer.allocate (n);
170 
171     if (remaining == 0)
172       {
173         state = STATE_FLUSHED;
174         return out;
175       }
176 
177     CoderResult cr = encode (in, out, true);
178     if (cr.isError ())
179       cr.throwException ();
180 
181     cr = flush (out);
182     if (cr.isError ())
183       cr.throwException ();
184 
185     out.flip ();
186     return out;
187   }
188 
encode(CharBuffer in, ByteBuffer out, boolean endOfInput)189   public final CoderResult encode (CharBuffer in, ByteBuffer out,
190                                    boolean endOfInput)
191   {
192     int newState = endOfInput ? STATE_END : STATE_CODING;
193     // XXX: Need to check for "previous step was an invocation [not] of
194     // this method with a value of true for the endOfInput parameter but
195     // a return value indicating an incomplete decoding operation"
196     // XXX: We will not check the previous return value, just
197     // that the previous call passed true for endOfInput
198     if (state != STATE_RESET && state != STATE_CODING
199         && !(endOfInput && state == STATE_END))
200       throw new IllegalStateException ();
201     state = newState;
202 
203     for (;;)
204       {
205         CoderResult cr;
206         try
207           {
208             cr = encodeLoop (in, out);
209           }
210         catch (RuntimeException e)
211           {
212             throw new CoderMalfunctionError (e);
213           }
214 
215         if (cr.isOverflow ())
216           return cr;
217 
218         if (cr.isUnderflow ())
219           {
220             if (endOfInput && in.hasRemaining ())
221               cr = CoderResult.malformedForLength (in.remaining ());
222             else
223               return cr;
224           }
225 
226         CodingErrorAction action = cr.isMalformed ()
227                                      ? malformedInputAction
228                                      : unmappableCharacterAction;
229 
230         if (action == CodingErrorAction.REPORT)
231           return cr;
232 
233         if (action == CodingErrorAction.REPLACE)
234           {
235             if (out.remaining () < replacement.length)
236               return CoderResult.OVERFLOW;
237             out.put (replacement);
238           }
239 
240         in.position (in.position () + cr.length ());
241       }
242   }
243 
encodeLoop(CharBuffer in, ByteBuffer out)244   protected abstract CoderResult encodeLoop (CharBuffer in, ByteBuffer out);
245 
flush(ByteBuffer out)246   public final CoderResult flush (ByteBuffer out)
247   {
248     // It seems weird that you can flush after reset, but Sun's javadoc
249     // says an IllegalStateException is thrown "If the previous step of the
250     // current decoding operation was an invocation neither of the reset
251     // method nor ... of the three-argument encode method with a value of
252     // true for the endOfInput parameter."
253     // Further note that flush() only requires that there not be
254     // an IllegalStateException if the previous step was a call to
255     // encode with true as the last argument.  It does not require
256     // that the call succeeded.  encode() does require that it succeeded.
257     // XXX: test this to see if reality matches javadoc
258     if (state != STATE_RESET && state != STATE_END)
259       throw new IllegalStateException ();
260 
261     state = STATE_FLUSHED;
262     return implFlush (out);
263   }
264 
implFlush(ByteBuffer out)265   protected CoderResult implFlush (ByteBuffer out)
266   {
267     return CoderResult.UNDERFLOW;
268   }
269 
implOnMalformedInput(CodingErrorAction newAction)270   protected void implOnMalformedInput (CodingErrorAction newAction)
271   {
272     // default implementation does nothing
273   }
274 
implOnUnmappableCharacter(CodingErrorAction newAction)275   protected void implOnUnmappableCharacter (CodingErrorAction newAction)
276   {
277     // default implementation does nothing
278   }
279 
implReplaceWith(byte[] newReplacement)280   protected void implReplaceWith (byte[] newReplacement)
281   {
282     // default implementation does nothing
283   }
284 
implReset()285   protected void implReset ()
286   {
287     // default implementation does nothing
288   }
289 
isLegalReplacement(byte[] replacement)290   public boolean isLegalReplacement (byte[] replacement)
291   {
292     // TODO: cache the decoder
293     // error actions will be REPORT after construction
294     CharsetDecoder decoder = charset.newDecoder ();
295     ByteBuffer bb = ByteBuffer.wrap (replacement);
296     CharBuffer cb
297       = CharBuffer.allocate ((int) (replacement.length
298                                     * decoder.maxCharsPerByte ()));
299     return !decoder.decode (bb, cb, true).isError ();
300   }
301 
malformedInputAction()302   public CodingErrorAction malformedInputAction ()
303   {
304     return malformedInputAction;
305   }
306 
maxBytesPerChar()307   public final float maxBytesPerChar ()
308   {
309     return maxBytesPerChar;
310   }
311 
onMalformedInput(CodingErrorAction newAction)312   public final CharsetEncoder onMalformedInput (CodingErrorAction newAction)
313   {
314     if (newAction == null)
315       throw new IllegalArgumentException ("Null action");
316 
317     malformedInputAction = newAction;
318     implOnMalformedInput (newAction);
319     return this;
320   }
321 
unmappableCharacterAction()322   public CodingErrorAction unmappableCharacterAction ()
323   {
324     return unmappableCharacterAction;
325   }
326 
onUnmappableCharacter(CodingErrorAction newAction)327   public final CharsetEncoder onUnmappableCharacter
328     (CodingErrorAction newAction)
329   {
330     if (newAction == null)
331       throw new IllegalArgumentException ("Null action");
332 
333     unmappableCharacterAction = newAction;
334     implOnUnmappableCharacter (newAction);
335     return this;
336   }
337 
replacement()338   public final byte[] replacement ()
339   {
340     return replacement;
341   }
342 
replaceWith(byte[] newReplacement)343   public final CharsetEncoder replaceWith (byte[] newReplacement)
344   {
345     if (newReplacement == null)
346       throw new IllegalArgumentException ("Null replacement");
347     if (newReplacement.length == 0)
348       throw new IllegalArgumentException ("Empty replacement");
349     // XXX: what about maxBytesPerChar?
350 
351       if (!isLegalReplacement (newReplacement))
352         throw new IllegalArgumentException ("Illegal replacement");
353 
354     this.replacement = newReplacement;
355     implReplaceWith (newReplacement);
356     return this;
357   }
358 
reset()359   public final CharsetEncoder reset ()
360   {
361     state = STATE_RESET;
362     implReset ();
363     return this;
364   }
365 }
366