1 /*
2  * Copyright 2015-present Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may
5  * not use this file except in compliance with the License. You may obtain
6  * a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations
14  * under the License.
15  */
16 
17 package com.facebook.watchman.bser;
18 
19 // CHECKSTYLE.OFF: AvoidStarImport
20 import static com.facebook.watchman.bser.BserConstants.*;
21 
22 import com.google.common.base.Preconditions;
23 import com.google.common.io.ByteStreams;
24 
25 import java.io.InputStream;
26 import java.io.IOException;
27 
28 import java.nio.BufferUnderflowException;
29 import java.nio.ByteBuffer;
30 import java.nio.ByteOrder;
31 import java.nio.charset.CharsetDecoder;
32 import java.nio.charset.CodingErrorAction;
33 import java.nio.charset.StandardCharsets;
34 
35 import java.util.ArrayList;
36 import java.util.Collections;
37 import java.util.LinkedHashMap;
38 import java.util.List;
39 import java.util.Map;
40 import java.util.TreeMap;
41 
42 import javax.annotation.Nullable;
43 
44 import com.facebook.watchman.Deserializer;
45 
46 /**
47  * Decoder for the BSER binary JSON format used by the Watchman service:
48  *
49  * https://facebook.github.io/watchman/docs/bser.html
50  */
51 public class BserDeserializer implements Deserializer {
52   public enum KeyOrdering {
53       UNSORTED,
54       SORTED
55   }
56 
57   /**
58    * Exception thrown when BSER parser unexpectedly reaches the end of
59    * the input stream.
60    */
61   @SuppressWarnings("serial")
62   public static class BserEofException extends IOException {
BserEofException(String message)63     public BserEofException(String message) {
64       super(message);
65     }
66 
BserEofException(String message, Throwable cause)67     public BserEofException(String message, Throwable cause) {
68       super(message, cause);
69     }
70   }
71 
72   private final KeyOrdering keyOrdering;
73   private final CharsetDecoder utf8Decoder;
74 
75   /**
76    * If {@code keyOrdering} is {@code SORTED}, any {@code Map} objects
77    * in the resulting value will have their keys sorted in natural
78    * order. Otherwise, any {@code Map}s will have their keys in the
79    * same order with which they were encoded.
80    */
BserDeserializer(KeyOrdering keyOrdering)81   public BserDeserializer(KeyOrdering keyOrdering) {
82     this.keyOrdering = keyOrdering;
83     this.utf8Decoder = StandardCharsets.UTF_8
84         .newDecoder()
85         .onMalformedInput(CodingErrorAction.REPORT);
86   }
87 
88   // 2 bytes marker, 1 byte int size
89   private static final int INITIAL_SNIFF_LEN = 3;
90 
91   // 2 bytes marker, 1 byte int size, up to 8 bytes int64 value
92   private static final int SNIFF_BUFFER_SIZE = 13;
93 
94   /**
95    * Deserializes the next BSER-encoded value from the stream.
96    *
97    * @return either a {@link String}, {@link Number}, {@link List},
98    * {@link Map}, or {@code null}, depending on the type of the
99    * top-level encoded object.
100    */
101   @Nullable
deserializeBserValue(InputStream inputStream)102   public Object deserializeBserValue(InputStream inputStream) throws IOException {
103     try {
104       return deserializeRecursive(readBserBuffer(inputStream));
105     } catch (BufferUnderflowException e) {
106       throw new BserEofException("Prematurely reached end of BSER buffer", e);
107     }
108   }
109 
110   /**
111    * Same as {@link BserDeserializer#deserializeBserValue(InputStream)}, but respecting the
112    * {@link Deserializer#deserialize(InputStream)} interface.
113    */
114   @SuppressWarnings("unchecked")
deserialize(InputStream inputStream)115   public Map<String, Object> deserialize(InputStream inputStream) throws IOException {
116     return (Map<String, Object>) deserializeBserValue(inputStream);
117   }
118 
readBserBuffer(InputStream inputStream)119   private ByteBuffer readBserBuffer(InputStream inputStream) throws IOException {
120     ByteBuffer sniffBuffer = ByteBuffer.allocate(SNIFF_BUFFER_SIZE).order(ByteOrder.nativeOrder());
121     Preconditions.checkState(sniffBuffer.hasArray());
122 
123     int sniffBytesRead = ByteStreams.read(inputStream, sniffBuffer.array(), 0, INITIAL_SNIFF_LEN);
124     if (sniffBytesRead < INITIAL_SNIFF_LEN) {
125       throw new BserEofException(
126           String.format(
127               "Invalid BSER header (expected %d bytes, got %d bytes)",
128               INITIAL_SNIFF_LEN,
129               sniffBytesRead));
130     }
131 
132     if (sniffBuffer.get() != 0x00 || sniffBuffer.get() != 0x01) {
133       throw new IOException("Invalid BSER header");
134     }
135 
136     byte lengthType = sniffBuffer.get();
137     int lengthBytesRemaining;
138     switch (lengthType) {
139       case BSER_INT8:
140         lengthBytesRemaining = 1;
141         break;
142       case BSER_INT16:
143         lengthBytesRemaining = 2;
144         break;
145       case BSER_INT32:
146         lengthBytesRemaining = 4;
147         break;
148       case BSER_INT64:
149         lengthBytesRemaining = 8;
150         break;
151       default:
152         throw new IOException(
153             String.format("Unrecognized BSER header length type %d", lengthType));
154     }
155     int lengthBytesRead = ByteStreams.read(
156         inputStream,
157         sniffBuffer.array(),
158         sniffBuffer.position(),
159         lengthBytesRemaining);
160     if (lengthBytesRead < lengthBytesRemaining) {
161       throw new BserEofException(
162           String.format(
163               "Invalid BSER header length (expected %d bytes, got %d bytes)",
164               lengthBytesRemaining,
165               lengthBytesRead));
166     }
167     int bytesRemaining = deserializeIntLen(sniffBuffer, lengthType);
168 
169     ByteBuffer bserBuffer = ByteBuffer.allocate(bytesRemaining)
170         .order(ByteOrder.nativeOrder());
171     Preconditions.checkState(bserBuffer.hasArray());
172 
173     int remainingBytesRead = ByteStreams.read(
174         inputStream,
175         bserBuffer.array(),
176         0,
177         bytesRemaining);
178 
179     if (remainingBytesRead < bytesRemaining) {
180       throw new IOException(
181           String.format(
182               "Invalid BSER header (expected %d bytes, got %d bytes)",
183               bytesRemaining,
184               remainingBytesRead));
185     }
186 
187     return bserBuffer;
188   }
189 
deserializeIntLen(ByteBuffer buffer, byte type)190   private int deserializeIntLen(ByteBuffer buffer, byte type) throws IOException {
191     long value = deserializeNumber(buffer, type).longValue();
192     if (value > Integer.MAX_VALUE) {
193       throw new IOException(
194           String.format(
195               "BSER length out of range (%d > %d)",
196               value,
197               Integer.MAX_VALUE));
198     } else if (value < 0) {
199       throw new IOException(
200           String.format(
201               "BSER length out of range (%d < 0)",
202               value));
203     }
204     return (int) value;
205   }
206 
deserializeNumber(ByteBuffer buffer, byte type)207   private Number deserializeNumber(ByteBuffer buffer, byte type) throws IOException {
208     switch (type) {
209       case BSER_INT8:
210         return buffer.get();
211       case BSER_INT16:
212         return buffer.getShort();
213       case BSER_INT32:
214         return buffer.getInt();
215       case BSER_INT64:
216         return buffer.getLong();
217       default:
218         throw new IOException(String.format("Invalid BSER number encoding %d", type));
219     }
220   }
221 
deserializeString(ByteBuffer buffer)222   private String deserializeString(ByteBuffer buffer) throws IOException {
223     byte intType = buffer.get();
224     int len = deserializeIntLen(buffer, intType);
225 
226     // We use a CharsetDecoder here instead of String(byte[], Charset)
227     // because we want it to throw an exception for any non-UTF-8 input.
228     buffer.limit(buffer.position() + len);
229 
230     try {
231       // We'll likely have many duplicates of this string. Java 7 and
232       // up have not-insane behavior of String.intern(), so we'll use
233       // it to deduplicate the String instances.
234       //
235       // See: http://java-performance.info/string-intern-in-java-6-7-8/
236       return utf8Decoder.decode(buffer).toString().intern();
237     } finally {
238       buffer.limit(buffer.capacity());
239     }
240   }
241 
deserializeArray(ByteBuffer buffer)242   private List<Object> deserializeArray(ByteBuffer buffer) throws IOException {
243     byte intType = buffer.get();
244     int numItems = deserializeIntLen(buffer, intType);
245     if (numItems == 0) {
246       return Collections.emptyList();
247     }
248     ArrayList<Object> list = new ArrayList<Object>(numItems);
249     for (int i = 0; i < numItems; i++) {
250       list.add(deserializeRecursive(buffer));
251     }
252     return list;
253   }
254 
deserializeObject(ByteBuffer buffer)255   private Map<String, Object> deserializeObject(ByteBuffer buffer) throws IOException {
256     byte intType = buffer.get();
257     int numItems = deserializeIntLen(buffer, intType);
258     if (numItems == 0) {
259       return Collections.emptyMap();
260     }
261     Map<String, Object> map;
262     if (keyOrdering == KeyOrdering.UNSORTED) {
263       map = new LinkedHashMap<String, Object>(numItems);
264     } else {
265       map = new TreeMap<String, Object>();
266     }
267     for (int i = 0; i < numItems; i++) {
268       byte stringType = buffer.get();
269       if (stringType != BSER_STRING) {
270         throw new IOException(
271             String.format(
272                 "Unrecognized BSER object key type %d, expected string",
273                 stringType));
274       }
275       String key = deserializeString(buffer);
276       Object value = deserializeRecursive(buffer);
277       map.put(key, value);
278     }
279     return map;
280   }
281 
deserializeTemplate(ByteBuffer buffer)282   private List<Map<String, Object>> deserializeTemplate(ByteBuffer buffer) throws IOException {
283     byte arrayType = buffer.get();
284     if (arrayType != BSER_ARRAY) {
285       throw new IOException(String.format("Expected ARRAY to follow TEMPLATE, got %d", arrayType));
286     }
287     List<Object> keys = deserializeArray(buffer);
288     byte numItemsType = buffer.get();
289     int numItems = deserializeIntLen(buffer, numItemsType);
290     ArrayList<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
291     for (int itemIdx = 0; itemIdx < numItems; itemIdx++) {
292       Map<String, Object> obj;
293       if (keyOrdering == KeyOrdering.UNSORTED) {
294         obj = new LinkedHashMap<String, Object>();
295       } else {
296         obj = new TreeMap<String, Object>();
297       }
298       for (int keyIdx = 0; keyIdx < keys.size(); keyIdx++) {
299         byte keyValueType = buffer.get();
300         if (keyValueType != BSER_SKIP) {
301           String key = (String) keys.get(keyIdx);
302           obj.put(key, deserializeRecursiveWithType(buffer, keyValueType));
303         }
304       }
305       result.add(obj);
306     }
307     return result;
308   }
309 
310   @Nullable
deserializeRecursive(ByteBuffer buffer)311   private Object deserializeRecursive(ByteBuffer buffer) throws IOException {
312     byte type = buffer.get();
313     return deserializeRecursiveWithType(buffer, type);
314   }
315 
deserializeRecursiveWithType(ByteBuffer buffer, byte type)316   private Object deserializeRecursiveWithType(ByteBuffer buffer, byte type) throws IOException {
317     switch (type) {
318       case BSER_INT8:
319       case BSER_INT16:
320       case BSER_INT32:
321       case BSER_INT64:
322         return deserializeNumber(buffer, type);
323       case BSER_REAL:
324         return buffer.getDouble();
325       case BSER_TRUE:
326         return true;
327       case BSER_FALSE:
328         return false;
329       case BSER_NULL:
330         return null;
331       case BSER_STRING:
332         return deserializeString(buffer);
333       case BSER_ARRAY:
334         return deserializeArray(buffer);
335       case BSER_OBJECT:
336         return deserializeObject(buffer);
337       case BSER_TEMPLATE:
338         return deserializeTemplate(buffer);
339       default:
340         throw new IOException(String.format("Unrecognized BSER value type %d", type));
341     }
342   }
343 }
344