1 /* 2 * Copyright 2015-present Facebook, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 * not use this file except in compliance with the License. You may obtain 6 * a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations 14 * under the License. 15 */ 16 17 package com.facebook.watchman.bser; 18 19 // CHECKSTYLE.OFF: AvoidStarImport 20 import static com.facebook.watchman.bser.BserConstants.*; 21 22 import com.google.common.base.Preconditions; 23 import com.google.common.io.ByteStreams; 24 25 import java.io.InputStream; 26 import java.io.IOException; 27 28 import java.nio.BufferUnderflowException; 29 import java.nio.ByteBuffer; 30 import java.nio.ByteOrder; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CodingErrorAction; 33 import java.nio.charset.StandardCharsets; 34 35 import java.util.ArrayList; 36 import java.util.Collections; 37 import java.util.LinkedHashMap; 38 import java.util.List; 39 import java.util.Map; 40 import java.util.TreeMap; 41 42 import javax.annotation.Nullable; 43 44 import com.facebook.watchman.Deserializer; 45 46 /** 47 * Decoder for the BSER binary JSON format used by the Watchman service: 48 * 49 * https://facebook.github.io/watchman/docs/bser.html 50 */ 51 public class BserDeserializer implements Deserializer { 52 public enum KeyOrdering { 53 UNSORTED, 54 SORTED 55 } 56 57 /** 58 * Exception thrown when BSER parser unexpectedly reaches the end of 59 * the input stream. 60 */ 61 @SuppressWarnings("serial") 62 public static class BserEofException extends IOException { BserEofException(String message)63 public BserEofException(String message) { 64 super(message); 65 } 66 BserEofException(String message, Throwable cause)67 public BserEofException(String message, Throwable cause) { 68 super(message, cause); 69 } 70 } 71 72 private final KeyOrdering keyOrdering; 73 private final CharsetDecoder utf8Decoder; 74 75 /** 76 * If {@code keyOrdering} is {@code SORTED}, any {@code Map} objects 77 * in the resulting value will have their keys sorted in natural 78 * order. Otherwise, any {@code Map}s will have their keys in the 79 * same order with which they were encoded. 80 */ BserDeserializer(KeyOrdering keyOrdering)81 public BserDeserializer(KeyOrdering keyOrdering) { 82 this.keyOrdering = keyOrdering; 83 this.utf8Decoder = StandardCharsets.UTF_8 84 .newDecoder() 85 .onMalformedInput(CodingErrorAction.REPORT); 86 } 87 88 // 2 bytes marker, 1 byte int size 89 private static final int INITIAL_SNIFF_LEN = 3; 90 91 // 2 bytes marker, 1 byte int size, up to 8 bytes int64 value 92 private static final int SNIFF_BUFFER_SIZE = 13; 93 94 /** 95 * Deserializes the next BSER-encoded value from the stream. 96 * 97 * @return either a {@link String}, {@link Number}, {@link List}, 98 * {@link Map}, or {@code null}, depending on the type of the 99 * top-level encoded object. 100 */ 101 @Nullable deserializeBserValue(InputStream inputStream)102 public Object deserializeBserValue(InputStream inputStream) throws IOException { 103 try { 104 return deserializeRecursive(readBserBuffer(inputStream)); 105 } catch (BufferUnderflowException e) { 106 throw new BserEofException("Prematurely reached end of BSER buffer", e); 107 } 108 } 109 110 /** 111 * Same as {@link BserDeserializer#deserializeBserValue(InputStream)}, but respecting the 112 * {@link Deserializer#deserialize(InputStream)} interface. 113 */ 114 @SuppressWarnings("unchecked") deserialize(InputStream inputStream)115 public Map<String, Object> deserialize(InputStream inputStream) throws IOException { 116 return (Map<String, Object>) deserializeBserValue(inputStream); 117 } 118 readBserBuffer(InputStream inputStream)119 private ByteBuffer readBserBuffer(InputStream inputStream) throws IOException { 120 ByteBuffer sniffBuffer = ByteBuffer.allocate(SNIFF_BUFFER_SIZE).order(ByteOrder.nativeOrder()); 121 Preconditions.checkState(sniffBuffer.hasArray()); 122 123 int sniffBytesRead = ByteStreams.read(inputStream, sniffBuffer.array(), 0, INITIAL_SNIFF_LEN); 124 if (sniffBytesRead < INITIAL_SNIFF_LEN) { 125 throw new BserEofException( 126 String.format( 127 "Invalid BSER header (expected %d bytes, got %d bytes)", 128 INITIAL_SNIFF_LEN, 129 sniffBytesRead)); 130 } 131 132 if (sniffBuffer.get() != 0x00 || sniffBuffer.get() != 0x01) { 133 throw new IOException("Invalid BSER header"); 134 } 135 136 byte lengthType = sniffBuffer.get(); 137 int lengthBytesRemaining; 138 switch (lengthType) { 139 case BSER_INT8: 140 lengthBytesRemaining = 1; 141 break; 142 case BSER_INT16: 143 lengthBytesRemaining = 2; 144 break; 145 case BSER_INT32: 146 lengthBytesRemaining = 4; 147 break; 148 case BSER_INT64: 149 lengthBytesRemaining = 8; 150 break; 151 default: 152 throw new IOException( 153 String.format("Unrecognized BSER header length type %d", lengthType)); 154 } 155 int lengthBytesRead = ByteStreams.read( 156 inputStream, 157 sniffBuffer.array(), 158 sniffBuffer.position(), 159 lengthBytesRemaining); 160 if (lengthBytesRead < lengthBytesRemaining) { 161 throw new BserEofException( 162 String.format( 163 "Invalid BSER header length (expected %d bytes, got %d bytes)", 164 lengthBytesRemaining, 165 lengthBytesRead)); 166 } 167 int bytesRemaining = deserializeIntLen(sniffBuffer, lengthType); 168 169 ByteBuffer bserBuffer = ByteBuffer.allocate(bytesRemaining) 170 .order(ByteOrder.nativeOrder()); 171 Preconditions.checkState(bserBuffer.hasArray()); 172 173 int remainingBytesRead = ByteStreams.read( 174 inputStream, 175 bserBuffer.array(), 176 0, 177 bytesRemaining); 178 179 if (remainingBytesRead < bytesRemaining) { 180 throw new IOException( 181 String.format( 182 "Invalid BSER header (expected %d bytes, got %d bytes)", 183 bytesRemaining, 184 remainingBytesRead)); 185 } 186 187 return bserBuffer; 188 } 189 deserializeIntLen(ByteBuffer buffer, byte type)190 private int deserializeIntLen(ByteBuffer buffer, byte type) throws IOException { 191 long value = deserializeNumber(buffer, type).longValue(); 192 if (value > Integer.MAX_VALUE) { 193 throw new IOException( 194 String.format( 195 "BSER length out of range (%d > %d)", 196 value, 197 Integer.MAX_VALUE)); 198 } else if (value < 0) { 199 throw new IOException( 200 String.format( 201 "BSER length out of range (%d < 0)", 202 value)); 203 } 204 return (int) value; 205 } 206 deserializeNumber(ByteBuffer buffer, byte type)207 private Number deserializeNumber(ByteBuffer buffer, byte type) throws IOException { 208 switch (type) { 209 case BSER_INT8: 210 return buffer.get(); 211 case BSER_INT16: 212 return buffer.getShort(); 213 case BSER_INT32: 214 return buffer.getInt(); 215 case BSER_INT64: 216 return buffer.getLong(); 217 default: 218 throw new IOException(String.format("Invalid BSER number encoding %d", type)); 219 } 220 } 221 deserializeString(ByteBuffer buffer)222 private String deserializeString(ByteBuffer buffer) throws IOException { 223 byte intType = buffer.get(); 224 int len = deserializeIntLen(buffer, intType); 225 226 // We use a CharsetDecoder here instead of String(byte[], Charset) 227 // because we want it to throw an exception for any non-UTF-8 input. 228 buffer.limit(buffer.position() + len); 229 230 try { 231 // We'll likely have many duplicates of this string. Java 7 and 232 // up have not-insane behavior of String.intern(), so we'll use 233 // it to deduplicate the String instances. 234 // 235 // See: http://java-performance.info/string-intern-in-java-6-7-8/ 236 return utf8Decoder.decode(buffer).toString().intern(); 237 } finally { 238 buffer.limit(buffer.capacity()); 239 } 240 } 241 deserializeArray(ByteBuffer buffer)242 private List<Object> deserializeArray(ByteBuffer buffer) throws IOException { 243 byte intType = buffer.get(); 244 int numItems = deserializeIntLen(buffer, intType); 245 if (numItems == 0) { 246 return Collections.emptyList(); 247 } 248 ArrayList<Object> list = new ArrayList<Object>(numItems); 249 for (int i = 0; i < numItems; i++) { 250 list.add(deserializeRecursive(buffer)); 251 } 252 return list; 253 } 254 deserializeObject(ByteBuffer buffer)255 private Map<String, Object> deserializeObject(ByteBuffer buffer) throws IOException { 256 byte intType = buffer.get(); 257 int numItems = deserializeIntLen(buffer, intType); 258 if (numItems == 0) { 259 return Collections.emptyMap(); 260 } 261 Map<String, Object> map; 262 if (keyOrdering == KeyOrdering.UNSORTED) { 263 map = new LinkedHashMap<String, Object>(numItems); 264 } else { 265 map = new TreeMap<String, Object>(); 266 } 267 for (int i = 0; i < numItems; i++) { 268 byte stringType = buffer.get(); 269 if (stringType != BSER_STRING) { 270 throw new IOException( 271 String.format( 272 "Unrecognized BSER object key type %d, expected string", 273 stringType)); 274 } 275 String key = deserializeString(buffer); 276 Object value = deserializeRecursive(buffer); 277 map.put(key, value); 278 } 279 return map; 280 } 281 deserializeTemplate(ByteBuffer buffer)282 private List<Map<String, Object>> deserializeTemplate(ByteBuffer buffer) throws IOException { 283 byte arrayType = buffer.get(); 284 if (arrayType != BSER_ARRAY) { 285 throw new IOException(String.format("Expected ARRAY to follow TEMPLATE, got %d", arrayType)); 286 } 287 List<Object> keys = deserializeArray(buffer); 288 byte numItemsType = buffer.get(); 289 int numItems = deserializeIntLen(buffer, numItemsType); 290 ArrayList<Map<String, Object>> result = new ArrayList<Map<String, Object>>(); 291 for (int itemIdx = 0; itemIdx < numItems; itemIdx++) { 292 Map<String, Object> obj; 293 if (keyOrdering == KeyOrdering.UNSORTED) { 294 obj = new LinkedHashMap<String, Object>(); 295 } else { 296 obj = new TreeMap<String, Object>(); 297 } 298 for (int keyIdx = 0; keyIdx < keys.size(); keyIdx++) { 299 byte keyValueType = buffer.get(); 300 if (keyValueType != BSER_SKIP) { 301 String key = (String) keys.get(keyIdx); 302 obj.put(key, deserializeRecursiveWithType(buffer, keyValueType)); 303 } 304 } 305 result.add(obj); 306 } 307 return result; 308 } 309 310 @Nullable deserializeRecursive(ByteBuffer buffer)311 private Object deserializeRecursive(ByteBuffer buffer) throws IOException { 312 byte type = buffer.get(); 313 return deserializeRecursiveWithType(buffer, type); 314 } 315 deserializeRecursiveWithType(ByteBuffer buffer, byte type)316 private Object deserializeRecursiveWithType(ByteBuffer buffer, byte type) throws IOException { 317 switch (type) { 318 case BSER_INT8: 319 case BSER_INT16: 320 case BSER_INT32: 321 case BSER_INT64: 322 return deserializeNumber(buffer, type); 323 case BSER_REAL: 324 return buffer.getDouble(); 325 case BSER_TRUE: 326 return true; 327 case BSER_FALSE: 328 return false; 329 case BSER_NULL: 330 return null; 331 case BSER_STRING: 332 return deserializeString(buffer); 333 case BSER_ARRAY: 334 return deserializeArray(buffer); 335 case BSER_OBJECT: 336 return deserializeObject(buffer); 337 case BSER_TEMPLATE: 338 return deserializeTemplate(buffer); 339 default: 340 throw new IOException(String.format("Unrecognized BSER value type %d", type)); 341 } 342 } 343 } 344