1 /* CharsetEncoder.java -- 2 Copyright (C) 2002 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 package java.nio.charset; 39 40 import java.nio.ByteBuffer; 41 import java.nio.CharBuffer; 42 43 /** 44 * @author Jesse Rosenstock 45 * @since 1.4 46 */ 47 public abstract class CharsetEncoder 48 { 49 private static final int STATE_RESET = 0; 50 private static final int STATE_CODING = 1; 51 private static final int STATE_END = 2; 52 private static final int STATE_FLUSHED = 3; 53 54 private static final byte[] DEFAULT_REPLACEMENT = {(byte)'?'}; 55 56 private final Charset charset; 57 private final float averageBytesPerChar; 58 private final float maxBytesPerChar; 59 private byte[] replacement; 60 61 private int state = STATE_RESET; 62 63 private CodingErrorAction malformedInputAction 64 = CodingErrorAction.REPORT; 65 private CodingErrorAction unmappableCharacterAction 66 = CodingErrorAction.REPORT; 67 CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar)68 protected CharsetEncoder (Charset cs, float averageBytesPerChar, 69 float maxBytesPerChar) 70 { 71 this (cs, averageBytesPerChar, maxBytesPerChar, DEFAULT_REPLACEMENT); 72 } 73 CharsetEncoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement)74 protected CharsetEncoder (Charset cs, float averageBytesPerChar, 75 float maxBytesPerChar, byte[] replacement) 76 { 77 if (averageBytesPerChar <= 0.0f) 78 throw new IllegalArgumentException ("Non-positive averageBytesPerChar"); 79 if (maxBytesPerChar <= 0.0f) 80 throw new IllegalArgumentException ("Non-positive maxBytesPerChar"); 81 82 this.charset = cs; 83 this.averageBytesPerChar 84 = averageBytesPerChar; 85 this.maxBytesPerChar 86 = maxBytesPerChar; 87 this.replacement = replacement; 88 implReplaceWith (replacement); 89 } 90 averageBytesPerChar()91 public final float averageBytesPerChar () 92 { 93 return averageBytesPerChar; 94 } 95 canEncode(char c)96 public boolean canEncode (char c) 97 { 98 CharBuffer cb = CharBuffer.allocate (1).put (c); 99 cb.flip (); 100 return canEncode (cb); 101 } 102 canEncode(CharSequence cs)103 public boolean canEncode (CharSequence cs) 104 { 105 CharBuffer cb; 106 if (cs instanceof CharBuffer) 107 cb = ((CharBuffer) cs).duplicate (); 108 else 109 cb = CharBuffer.wrap (cs); 110 return canEncode (cb); 111 } 112 canEncode(CharBuffer cb)113 private boolean canEncode (CharBuffer cb) 114 { 115 // It is an error if a coding operation is "in progress" 116 // I take that to mean the state is not reset or flushed. 117 // XXX: check "in progress" everywhere 118 if (state == STATE_FLUSHED) 119 reset (); 120 else if (state != STATE_RESET) 121 throw new IllegalStateException (); 122 123 CodingErrorAction oldMalformedInputAction = malformedInputAction; 124 CodingErrorAction oldUnmappableCharacterAction 125 = unmappableCharacterAction; 126 127 try 128 { 129 if (oldMalformedInputAction != CodingErrorAction.REPORT) 130 onMalformedInput (CodingErrorAction.REPORT); 131 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT) 132 onUnmappableCharacter (CodingErrorAction.REPORT); 133 } 134 catch (Exception e) 135 { 136 return false; 137 } 138 finally 139 { 140 if (oldMalformedInputAction != CodingErrorAction.REPORT) 141 onMalformedInput (oldMalformedInputAction); 142 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT) 143 onUnmappableCharacter (oldUnmappableCharacterAction); 144 } 145 146 return true; 147 } 148 charset()149 public final Charset charset () 150 { 151 return charset; 152 } 153 encode(CharBuffer in)154 public final ByteBuffer encode (CharBuffer in) 155 throws CharacterCodingException 156 { 157 // XXX: Sun's Javadoc seems to contradict itself saying an 158 // IllegalStateException is thrown "if a decoding operation is already 159 // in progress" and also that "it resets this Encoder". 160 // Should we check to see that the state is reset, or should we 161 // call reset()? 162 if (state != STATE_RESET) 163 throw new IllegalStateException (); 164 165 // REVIEW: Using max instead of average may allocate a very large 166 // buffer. Maybe we should do something more efficient? 167 int remaining = in.remaining (); 168 int n = (int) (remaining * maxBytesPerChar ()); 169 ByteBuffer out = ByteBuffer.allocate (n); 170 171 if (remaining == 0) 172 { 173 state = STATE_FLUSHED; 174 return out; 175 } 176 177 CoderResult cr = encode (in, out, true); 178 if (cr.isError ()) 179 cr.throwException (); 180 181 cr = flush (out); 182 if (cr.isError ()) 183 cr.throwException (); 184 185 out.flip (); 186 187 // Unfortunately, resizing the actual bytebuffer array is required. 188 byte[] resized = new byte[out.remaining()]; 189 out.get(resized); 190 return ByteBuffer.wrap(resized); 191 } 192 encode(CharBuffer in, ByteBuffer out, boolean endOfInput)193 public final CoderResult encode (CharBuffer in, ByteBuffer out, 194 boolean endOfInput) 195 { 196 int newState = endOfInput ? STATE_END : STATE_CODING; 197 // XXX: Need to check for "previous step was an invocation [not] of 198 // this method with a value of true for the endOfInput parameter but 199 // a return value indicating an incomplete decoding operation" 200 // XXX: We will not check the previous return value, just 201 // that the previous call passed true for endOfInput 202 if (state != STATE_RESET && state != STATE_CODING 203 && !(endOfInput && state == STATE_END)) 204 throw new IllegalStateException (); 205 state = newState; 206 207 for (;;) 208 { 209 CoderResult cr; 210 try 211 { 212 cr = encodeLoop (in, out); 213 } 214 catch (RuntimeException e) 215 { 216 throw new CoderMalfunctionError (e); 217 } 218 219 if (cr.isOverflow ()) 220 return cr; 221 222 if (cr.isUnderflow ()) 223 { 224 if (endOfInput && in.hasRemaining ()) 225 cr = CoderResult.malformedForLength (in.remaining ()); 226 else 227 return cr; 228 } 229 230 CodingErrorAction action = cr.isMalformed () 231 ? malformedInputAction 232 : unmappableCharacterAction; 233 234 if (action == CodingErrorAction.REPORT) 235 return cr; 236 237 if (action == CodingErrorAction.REPLACE) 238 { 239 if (out.remaining () < replacement.length) 240 return CoderResult.OVERFLOW; 241 out.put (replacement); 242 } 243 244 in.position (in.position () + cr.length ()); 245 } 246 } 247 encodeLoop(CharBuffer in, ByteBuffer out)248 protected abstract CoderResult encodeLoop (CharBuffer in, ByteBuffer out); 249 flush(ByteBuffer out)250 public final CoderResult flush (ByteBuffer out) 251 { 252 // It seems weird that you can flush after reset, but Sun's javadoc 253 // says an IllegalStateException is thrown "If the previous step of the 254 // current decoding operation was an invocation neither of the reset 255 // method nor ... of the three-argument encode method with a value of 256 // true for the endOfInput parameter." 257 // Further note that flush() only requires that there not be 258 // an IllegalStateException if the previous step was a call to 259 // encode with true as the last argument. It does not require 260 // that the call succeeded. encode() does require that it succeeded. 261 // XXX: test this to see if reality matches javadoc 262 if (state != STATE_RESET && state != STATE_END) 263 throw new IllegalStateException (); 264 265 state = STATE_FLUSHED; 266 return implFlush (out); 267 } 268 implFlush(ByteBuffer out)269 protected CoderResult implFlush (ByteBuffer out) 270 { 271 return CoderResult.UNDERFLOW; 272 } 273 implOnMalformedInput(CodingErrorAction newAction)274 protected void implOnMalformedInput (CodingErrorAction newAction) 275 { 276 // default implementation does nothing 277 } 278 implOnUnmappableCharacter(CodingErrorAction newAction)279 protected void implOnUnmappableCharacter (CodingErrorAction newAction) 280 { 281 // default implementation does nothing 282 } 283 implReplaceWith(byte[] newReplacement)284 protected void implReplaceWith (byte[] newReplacement) 285 { 286 // default implementation does nothing 287 } 288 implReset()289 protected void implReset () 290 { 291 // default implementation does nothing 292 } 293 isLegalReplacement(byte[] replacement)294 public boolean isLegalReplacement (byte[] replacement) 295 { 296 // TODO: cache the decoder 297 // error actions will be REPORT after construction 298 CharsetDecoder decoder = charset.newDecoder (); 299 ByteBuffer bb = ByteBuffer.wrap (replacement); 300 CharBuffer cb 301 = CharBuffer.allocate ((int) (replacement.length 302 * decoder.maxCharsPerByte ())); 303 return !decoder.decode (bb, cb, true).isError (); 304 } 305 malformedInputAction()306 public CodingErrorAction malformedInputAction () 307 { 308 return malformedInputAction; 309 } 310 maxBytesPerChar()311 public final float maxBytesPerChar () 312 { 313 return maxBytesPerChar; 314 } 315 onMalformedInput(CodingErrorAction newAction)316 public final CharsetEncoder onMalformedInput (CodingErrorAction newAction) 317 { 318 if (newAction == null) 319 throw new IllegalArgumentException ("Null action"); 320 321 malformedInputAction = newAction; 322 implOnMalformedInput (newAction); 323 return this; 324 } 325 unmappableCharacterAction()326 public CodingErrorAction unmappableCharacterAction () 327 { 328 return unmappableCharacterAction; 329 } 330 onUnmappableCharacter(CodingErrorAction newAction)331 public final CharsetEncoder onUnmappableCharacter 332 (CodingErrorAction newAction) 333 { 334 if (newAction == null) 335 throw new IllegalArgumentException ("Null action"); 336 337 unmappableCharacterAction = newAction; 338 implOnUnmappableCharacter (newAction); 339 return this; 340 } 341 replacement()342 public final byte[] replacement () 343 { 344 return replacement; 345 } 346 replaceWith(byte[] newReplacement)347 public final CharsetEncoder replaceWith (byte[] newReplacement) 348 { 349 if (newReplacement == null) 350 throw new IllegalArgumentException ("Null replacement"); 351 if (newReplacement.length == 0) 352 throw new IllegalArgumentException ("Empty replacement"); 353 // XXX: what about maxBytesPerChar? 354 355 if (!isLegalReplacement (newReplacement)) 356 throw new IllegalArgumentException ("Illegal replacement"); 357 358 this.replacement = newReplacement; 359 implReplaceWith (newReplacement); 360 return this; 361 } 362 reset()363 public final CharsetEncoder reset () 364 { 365 state = STATE_RESET; 366 implReset (); 367 return this; 368 } 369 } 370