1 /* CharsetEncoder.java -- 2 Copyright (C) 2002 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 package java.nio.charset; 39 40 import java.nio.ByteBuffer; 41 import java.nio.CharBuffer; 42 43 /** 44 * @author Jesse Rosenstock 45 * @since 1.4 46 */ 47 public abstract class CharsetEncoder 48 { 49 private static final int STATE_RESET = 0; 50 private static final int STATE_CODING = 1; 51 private static final int STATE_END = 2; 52 private static final int STATE_FLUSHED = 3; 53 54 private static final byte[] DEFAULT_REPLACEMENT = {(byte)'?'}; 55 56 private final Charset charset; 57 private final float averageBytesPerChar; 58 private final float maxBytesPerChar; 59 private byte[] replacement; 60 61 private int state = STATE_RESET; 62 63 private CodingErrorAction malformedInputAction 64 = CodingErrorAction.REPORT; 65 private CodingErrorAction unmappableCharacterAction 66 = CodingErrorAction.REPORT; 67 CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar)68 protected CharsetEncoder (Charset cs, float averageBytesPerChar, 69 float maxBytesPerChar) 70 { 71 this (cs, averageBytesPerChar, maxBytesPerChar, DEFAULT_REPLACEMENT); 72 } 73 CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement)74 protected CharsetEncoder (Charset cs, float averageBytesPerChar, 75 float maxBytesPerChar, byte[] replacement) 76 { 77 if (averageBytesPerChar <= 0.0f) 78 throw new IllegalArgumentException ("Non-positive averageBytesPerChar"); 79 if (maxBytesPerChar <= 0.0f) 80 throw new IllegalArgumentException ("Non-positive maxBytesPerChar"); 81 82 this.charset = cs; 83 this.averageBytesPerChar 84 = averageBytesPerChar; 85 this.maxBytesPerChar 86 = maxBytesPerChar; 87 this.replacement = replacement; 88 implReplaceWith (replacement); 89 } 90 averageBytesPerChar()91 public final float averageBytesPerChar () 92 { 93 return averageBytesPerChar; 94 } 95 canEncode(char c)96 public boolean canEncode (char c) 97 { 98 CharBuffer cb = CharBuffer.allocate (1).put (c); 99 cb.flip (); 100 return canEncode (cb); 101 } 102 canEncode(CharSequence cs)103 public boolean canEncode (CharSequence cs) 104 { 105 CharBuffer cb; 106 if (cs instanceof CharBuffer) 107 cb = ((CharBuffer) cs).duplicate (); 108 else 109 cb = CharBuffer.wrap (cs); 110 return canEncode (cb); 111 } 112 canEncode(CharBuffer cb)113 private boolean canEncode (CharBuffer cb) 114 { 115 // It is an error if a coding operation is "in progress" 116 // I take that to mean the state is not reset or flushed. 117 // XXX: check "in progress" everywhere 118 if (state == STATE_FLUSHED) 119 reset (); 120 else if (state != STATE_RESET) 121 throw new IllegalStateException (); 122 123 CodingErrorAction oldMalformedInputAction = malformedInputAction; 124 CodingErrorAction oldUnmappableCharacterAction 125 = unmappableCharacterAction; 126 127 try 128 { 129 if (oldMalformedInputAction != CodingErrorAction.REPORT) 130 onMalformedInput (CodingErrorAction.REPORT); 131 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT) 132 onUnmappableCharacter (CodingErrorAction.REPORT); 133 } 134 catch (Exception e) 135 { 136 return false; 137 } 138 finally 139 { 140 if (oldMalformedInputAction != CodingErrorAction.REPORT) 141 onMalformedInput (oldMalformedInputAction); 142 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT) 143 onUnmappableCharacter (oldUnmappableCharacterAction); 144 } 145 146 return true; 147 } 148 charset()149 public final Charset charset () 150 { 151 return charset; 152 } 153 encode(CharBuffer in)154 public final ByteBuffer encode (CharBuffer in) 155 throws CharacterCodingException 156 { 157 // XXX: Sun's Javadoc seems to contradict itself saying an 158 // IllegalStateException is thrown "if a decoding operation is already 159 // in progress" and also that "it resets this Encoder". 160 // Should we check to see that the state is reset, or should we 161 // call reset()? 162 if (state != STATE_RESET) 163 throw new IllegalStateException (); 164 165 // REVIEW: Using max instead of average may allocate a very large 166 // buffer. Maybe we should do something more efficient? 167 int remaining = in.remaining (); 168 int n = (int) (remaining * maxBytesPerChar ()); 169 ByteBuffer out = ByteBuffer.allocate (n); 170 171 if (remaining == 0) 172 { 173 state = STATE_FLUSHED; 174 return out; 175 } 176 177 CoderResult cr = encode (in, out, true); 178 if (cr.isError ()) 179 cr.throwException (); 180 181 cr = flush (out); 182 if (cr.isError ()) 183 cr.throwException (); 184 185 out.flip (); 186 return out; 187 } 188 encode(CharBuffer in, ByteBuffer out, boolean endOfInput)189 public final CoderResult encode (CharBuffer in, ByteBuffer out, 190 boolean endOfInput) 191 { 192 int newState = endOfInput ? STATE_END : STATE_CODING; 193 // XXX: Need to check for "previous step was an invocation [not] of 194 // this method with a value of true for the endOfInput parameter but 195 // a return value indicating an incomplete decoding operation" 196 // XXX: We will not check the previous return value, just 197 // that the previous call passed true for endOfInput 198 if (state != STATE_RESET && state != STATE_CODING 199 && !(endOfInput && state == STATE_END)) 200 throw new IllegalStateException (); 201 state = newState; 202 203 for (;;) 204 { 205 CoderResult cr; 206 try 207 { 208 cr = encodeLoop (in, out); 209 } 210 catch (RuntimeException e) 211 { 212 throw new CoderMalfunctionError (e); 213 } 214 215 if (cr.isOverflow ()) 216 return cr; 217 218 if (cr.isUnderflow ()) 219 { 220 if (endOfInput && in.hasRemaining ()) 221 cr = CoderResult.malformedForLength (in.remaining ()); 222 else 223 return cr; 224 } 225 226 CodingErrorAction action = cr.isMalformed () 227 ? malformedInputAction 228 : unmappableCharacterAction; 229 230 if (action == CodingErrorAction.REPORT) 231 return cr; 232 233 if (action == CodingErrorAction.REPLACE) 234 { 235 if (out.remaining () < replacement.length) 236 return CoderResult.OVERFLOW; 237 out.put (replacement); 238 } 239 240 in.position (in.position () + cr.length ()); 241 } 242 } 243 encodeLoop(CharBuffer in, ByteBuffer out)244 protected abstract CoderResult encodeLoop (CharBuffer in, ByteBuffer out); 245 flush(ByteBuffer out)246 public final CoderResult flush (ByteBuffer out) 247 { 248 // It seems weird that you can flush after reset, but Sun's javadoc 249 // says an IllegalStateException is thrown "If the previous step of the 250 // current decoding operation was an invocation neither of the reset 251 // method nor ... of the three-argument encode method with a value of 252 // true for the endOfInput parameter." 253 // Further note that flush() only requires that there not be 254 // an IllegalStateException if the previous step was a call to 255 // encode with true as the last argument. It does not require 256 // that the call succeeded. encode() does require that it succeeded. 257 // XXX: test this to see if reality matches javadoc 258 if (state != STATE_RESET && state != STATE_END) 259 throw new IllegalStateException (); 260 261 state = STATE_FLUSHED; 262 return implFlush (out); 263 } 264 implFlush(ByteBuffer out)265 protected CoderResult implFlush (ByteBuffer out) 266 { 267 return CoderResult.UNDERFLOW; 268 } 269 implOnMalformedInput(CodingErrorAction newAction)270 protected void implOnMalformedInput (CodingErrorAction newAction) 271 { 272 // default implementation does nothing 273 } 274 implOnUnmappableCharacter(CodingErrorAction newAction)275 protected void implOnUnmappableCharacter (CodingErrorAction newAction) 276 { 277 // default implementation does nothing 278 } 279 implReplaceWith(byte[] newReplacement)280 protected void implReplaceWith (byte[] newReplacement) 281 { 282 // default implementation does nothing 283 } 284 implReset()285 protected void implReset () 286 { 287 // default implementation does nothing 288 } 289 isLegalReplacement(byte[] replacement)290 public boolean isLegalReplacement (byte[] replacement) 291 { 292 // TODO: cache the decoder 293 // error actions will be REPORT after construction 294 CharsetDecoder decoder = charset.newDecoder (); 295 ByteBuffer bb = ByteBuffer.wrap (replacement); 296 CharBuffer cb 297 = CharBuffer.allocate ((int) (replacement.length 298 * decoder.maxCharsPerByte ())); 299 return !decoder.decode (bb, cb, true).isError (); 300 } 301 malformedInputAction()302 public CodingErrorAction malformedInputAction () 303 { 304 return malformedInputAction; 305 } 306 maxBytesPerChar()307 public final float maxBytesPerChar () 308 { 309 return maxBytesPerChar; 310 } 311 onMalformedInput(CodingErrorAction newAction)312 public final CharsetEncoder onMalformedInput (CodingErrorAction newAction) 313 { 314 if (newAction == null) 315 throw new IllegalArgumentException ("Null action"); 316 317 malformedInputAction = newAction; 318 implOnMalformedInput (newAction); 319 return this; 320 } 321 unmappableCharacterAction()322 public CodingErrorAction unmappableCharacterAction () 323 { 324 return unmappableCharacterAction; 325 } 326 onUnmappableCharacter(CodingErrorAction newAction)327 public final CharsetEncoder onUnmappableCharacter 328 (CodingErrorAction newAction) 329 { 330 if (newAction == null) 331 throw new IllegalArgumentException ("Null action"); 332 333 unmappableCharacterAction = newAction; 334 implOnUnmappableCharacter (newAction); 335 return this; 336 } 337 replacement()338 public final byte[] replacement () 339 { 340 return replacement; 341 } 342 replaceWith(byte[] newReplacement)343 public final CharsetEncoder replaceWith (byte[] newReplacement) 344 { 345 if (newReplacement == null) 346 throw new IllegalArgumentException ("Null replacement"); 347 if (newReplacement.length == 0) 348 throw new IllegalArgumentException ("Empty replacement"); 349 // XXX: what about maxBytesPerChar? 350 351 if (!isLegalReplacement (newReplacement)) 352 throw new IllegalArgumentException ("Illegal replacement"); 353 354 this.replacement = newReplacement; 355 implReplaceWith (newReplacement); 356 return this; 357 } 358 reset()359 public final CharsetEncoder reset () 360 { 361 state = STATE_RESET; 362 implReset (); 363 return this; 364 } 365 } 366