1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with this
4  * work for additional information regarding copyright ownership. The ASF
5  * licenses this file to you under the Apache License, Version 2.0 (the
6  * "License"); you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14  * License for the specific language governing permissions and limitations
15  * under the License.
16  */
17 package org.apache.hadoop.hbase.io.encoding;
18 
19 import java.io.DataInputStream;
20 import java.io.DataOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23 
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.Cell;
26 import org.apache.hadoop.hbase.KeyValue.KVComparator;
27 import org.apache.hadoop.hbase.io.hfile.HFileContext;
28 
29 /**
30  * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
31  * <ul>
32  * <li>the KeyValues are stored sorted by key</li>
33  * <li>we know the structure of KeyValue</li>
34  * <li>the values are always iterated forward from beginning of block</li>
35  * <li>knowledge of Key Value format</li>
36  * </ul>
37  * It is designed to work fast enough to be feasible as in memory compression.
38  */
39 @InterfaceAudience.Private
40 public interface DataBlockEncoder {
41 
42   /**
43    * Starts encoding for a block of KeyValues. Call
44    * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish
45    * encoding of a block.
46    * @param encodingCtx
47    * @param out
48    * @throws IOException
49    */
startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out)50   void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out)
51       throws IOException;
52 
53   /**
54    * Encodes a KeyValue.
55    * @param cell
56    * @param encodingCtx
57    * @param out
58    * @return unencoded kv size written
59    * @throws IOException
60    */
encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)61   int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
62       throws IOException;
63 
64   /**
65    * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing
66    * stuff for the encoded block. It must be called at the end of block encoding.
67    * @param encodingCtx
68    * @param out
69    * @param uncompressedBytesWithHeader
70    * @throws IOException
71    */
endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out, byte[] uncompressedBytesWithHeader)72   void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
73       byte[] uncompressedBytesWithHeader) throws IOException;
74 
75   /**
76    * Decode.
77    * @param source Compressed stream of KeyValues.
78    * @param decodingCtx
79    * @return Uncompressed block of KeyValues.
80    * @throws IOException If there is an error in source.
81    */
decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)82   ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
83       throws IOException;
84 
85   /**
86    * Return first key in block. Useful for indexing. Typically does not make
87    * a deep copy but returns a buffer wrapping a segment of the actual block's
88    * byte array. This is because the first key in block is usually stored
89    * unencoded.
90    * @param block encoded block we want index, the position will not change
91    * @return First key in block.
92    */
getFirstKeyInBlock(ByteBuffer block)93   ByteBuffer getFirstKeyInBlock(ByteBuffer block);
94 
95   /**
96    * Create a HFileBlock seeker which find KeyValues within a block.
97    * @param comparator what kind of comparison should be used
98    * @param decodingCtx
99    * @return A newly created seeker.
100    */
createSeeker(KVComparator comparator, HFileBlockDecodingContext decodingCtx)101   EncodedSeeker createSeeker(KVComparator comparator,
102       HFileBlockDecodingContext decodingCtx);
103 
104   /**
105    * Creates a encoder specific encoding context
106    *
107    * @param encoding
108    *          encoding strategy used
109    * @param headerBytes
110    *          header bytes to be written, put a dummy header here if the header
111    *          is unknown
112    * @param meta
113    *          HFile meta data
114    * @return a newly created encoding context
115    */
newDataBlockEncodingContext( DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta)116   HFileBlockEncodingContext newDataBlockEncodingContext(
117       DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
118 
119   /**
120    * Creates an encoder specific decoding context, which will prepare the data
121    * before actual decoding
122    *
123    * @param meta
124    *          HFile meta data
125    * @return a newly created decoding context
126    */
newDataBlockDecodingContext(HFileContext meta)127   HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta);
128 
129   /**
130    * An interface which enable to seek while underlying data is encoded.
131    *
132    * It works on one HFileBlock, but it is reusable. See
133    * {@link #setCurrentBuffer(ByteBuffer)}.
134    */
135   interface EncodedSeeker {
136     /**
137      * Set on which buffer there will be done seeking.
138      * @param buffer Used for seeking.
139      */
setCurrentBuffer(ByteBuffer buffer)140     void setCurrentBuffer(ByteBuffer buffer);
141 
142     /**
143      * Does a deep copy of the key at the current position. A deep copy is
144      * necessary because buffers are reused in the decoder.
145      * @return key at current position
146      */
getKeyDeepCopy()147     ByteBuffer getKeyDeepCopy();
148 
149     /**
150      * Does a shallow copy of the value at the current position. A shallow
151      * copy is possible because the returned buffer refers to the backing array
152      * of the original encoded buffer.
153      * @return value at current position
154      */
getValueShallowCopy()155     ByteBuffer getValueShallowCopy();
156 
157 
158     /**
159      * @return the KeyValue object at the current position. Includes memstore
160      *         timestamp.
161      */
getKeyValue()162     Cell getKeyValue();
163 
164     /** Set position to beginning of given block */
rewind()165     void rewind();
166 
167     /**
168      * Move to next position
169      * @return true on success, false if there is no more positions.
170      */
next()171     boolean next();
172 
173     /**
174      * Moves the seeker position within the current block to:
175      * <ul>
176      * <li>the last key that that is less than or equal to the given key if
177      * <code>seekBefore</code> is false</li>
178      * <li>the last key that is strictly less than the given key if <code>
179      * seekBefore</code> is true. The caller is responsible for loading the
180      * previous block if the requested key turns out to be the first key of the
181      * current block.</li>
182      * </ul>
183      * @param key byte array containing the key
184      * @param offset key position the array
185      * @param length key length in bytes
186      * @param seekBefore find the key strictly less than the given key in case
187      *          of an exact match. Does not matter in case of an inexact match.
188      * @return 0 on exact match, 1 on inexact match.
189      */
190     @Deprecated
seekToKeyInBlock( byte[] key, int offset, int length, boolean seekBefore )191     int seekToKeyInBlock(
192       byte[] key, int offset, int length, boolean seekBefore
193     );
194     /**
195      * Moves the seeker position within the current block to:
196      * <ul>
197      * <li>the last key that that is less than or equal to the given key if
198      * <code>seekBefore</code> is false</li>
199      * <li>the last key that is strictly less than the given key if <code>
200      * seekBefore</code> is true. The caller is responsible for loading the
201      * previous block if the requested key turns out to be the first key of the
202      * current block.</li>
203      * </ul>
204      * @param key - Cell to which the seek should happen
205      * @param seekBefore find the key strictly less than the given key in case
206      *          of an exact match. Does not matter in case of an inexact match.
207      * @return 0 on exact match, 1 on inexact match.
208      */
seekToKeyInBlock(Cell key, boolean seekBefore)209     int seekToKeyInBlock(Cell key, boolean seekBefore);
210 
211     /**
212      * Compare the given key against the current key
213      * @param comparator
214      * @param key
215      * @param offset
216      * @param length
217      * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
218      */
compareKey(KVComparator comparator, byte[] key, int offset, int length)219     public int compareKey(KVComparator comparator, byte[] key, int offset, int length);
220 
compareKey(KVComparator comparator, Cell key)221     public int compareKey(KVComparator comparator, Cell key);
222   }
223 }
224