1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations 15 * under the License. 16 */ 17 package org.apache.hadoop.hbase.io.encoding; 18 19 import java.io.IOException; 20 import java.io.OutputStream; 21 import java.util.HashMap; 22 import java.util.Map; 23 24 import org.apache.hadoop.hbase.classification.InterfaceAudience; 25 import org.apache.hadoop.hbase.classification.InterfaceStability; 26 import org.apache.hadoop.hbase.util.Bytes; 27 28 /** 29 * Provide access to all data block encoding algorithms. All of the algorithms 30 * are required to have unique id which should <b>NEVER</b> be changed. If you 31 * want to add a new algorithm/version, assign it a new id. Announce the new id 32 * in the HBase mailing list to prevent collisions. 33 */ 34 @InterfaceAudience.Public 35 @InterfaceStability.Evolving 36 public enum DataBlockEncoding { 37 38 /** Disable data block encoding. */ 39 NONE(0, null), 40 // id 1 is reserved for the BITSET algorithm to be added later 41 PREFIX(2, "org.apache.hadoop.hbase.io.encoding.PrefixKeyDeltaEncoder"), 42 DIFF(3, "org.apache.hadoop.hbase.io.encoding.DiffKeyDeltaEncoder"), 43 FAST_DIFF(4, "org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder"), 44 // id 5 is reserved for the COPY_KEY algorithm for benchmarking 45 // COPY_KEY(5, "org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder"), 46 PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec"); 47 48 private final short id; 49 private final byte[] idInBytes; 50 private DataBlockEncoder encoder; 51 private final String encoderCls; 52 53 public static final int ID_SIZE = Bytes.SIZEOF_SHORT; 54 55 /** Maps data block encoding ids to enum instances. */ 56 private static Map<Short, DataBlockEncoding> idToEncoding = 57 new HashMap<Short, DataBlockEncoding>(); 58 59 static { 60 for (DataBlockEncoding algo : values()) { 61 if (idToEncoding.containsKey(algo.id)) { 62 throw new RuntimeException(String.format( 63 "Two data block encoder algorithms '%s' and '%s' have " + 64 "the same id %d", 65 idToEncoding.get(algo.id).toString(), algo.toString(), 66 (int) algo.id)); 67 } idToEncoding.put(algo.id, algo)68 idToEncoding.put(algo.id, algo); 69 } 70 } 71 DataBlockEncoding(int id, String encoderClsName)72 private DataBlockEncoding(int id, String encoderClsName) { 73 if (id < Short.MIN_VALUE || id > Short.MAX_VALUE) { 74 throw new AssertionError( 75 "Data block encoding algorithm id is out of range: " + id); 76 } 77 this.id = (short) id; 78 this.idInBytes = Bytes.toBytes(this.id); 79 if (idInBytes.length != ID_SIZE) { 80 // White this may seem redundant, if we accidentally serialize 81 // the id as e.g. an int instead of a short, all encoders will break. 82 throw new RuntimeException("Unexpected length of encoder ID byte " + 83 "representation: " + Bytes.toStringBinary(idInBytes)); 84 } 85 this.encoderCls = encoderClsName; 86 } 87 88 /** 89 * @return name converted to bytes. 90 */ getNameInBytes()91 public byte[] getNameInBytes() { 92 return Bytes.toBytes(toString()); 93 } 94 95 /** 96 * @return The id of a data block encoder. 97 */ getId()98 public short getId() { 99 return id; 100 } 101 102 /** 103 * Writes id in bytes. 104 * @param stream where the id should be written. 105 */ writeIdInBytes(OutputStream stream)106 public void writeIdInBytes(OutputStream stream) throws IOException { 107 stream.write(idInBytes); 108 } 109 110 111 /** 112 * Writes id bytes to the given array starting from offset. 113 * 114 * @param dest output array 115 * @param offset starting offset of the output array 116 * @throws IOException 117 */ writeIdInBytes(byte[] dest, int offset)118 public void writeIdInBytes(byte[] dest, int offset) throws IOException { 119 System.arraycopy(idInBytes, 0, dest, offset, ID_SIZE); 120 } 121 122 /** 123 * Return new data block encoder for given algorithm type. 124 * @return data block encoder if algorithm is specified, null if none is 125 * selected. 126 */ getEncoder()127 public DataBlockEncoder getEncoder() { 128 if (encoder == null && id != 0) { 129 // lazily create the encoder 130 encoder = createEncoder(encoderCls); 131 } 132 return encoder; 133 } 134 135 /** 136 * Find and create data block encoder for given id; 137 * @param encoderId id of data block encoder. 138 * @return Newly created data block encoder. 139 */ getDataBlockEncoderById(short encoderId)140 public static DataBlockEncoder getDataBlockEncoderById(short encoderId) { 141 if (!idToEncoding.containsKey(encoderId)) { 142 throw new IllegalArgumentException(String.format( 143 "There is no data block encoder for given id '%d'", 144 (int) encoderId)); 145 } 146 147 return idToEncoding.get(encoderId).getEncoder(); 148 } 149 150 /** 151 * Find and return the name of data block encoder for the given id. 152 * @param encoderId id of data block encoder 153 * @return name, same as used in options in column family 154 */ getNameFromId(short encoderId)155 public static String getNameFromId(short encoderId) { 156 return idToEncoding.get(encoderId).toString(); 157 } 158 159 /** 160 * Check if given encoder has this id. 161 * @param encoder encoder which id will be checked 162 * @param encoderId id which we except 163 * @return true if id is right for given encoder, false otherwise 164 * @exception IllegalArgumentException 165 * thrown when there is no matching data block encoder 166 */ isCorrectEncoder(DataBlockEncoder encoder, short encoderId)167 public static boolean isCorrectEncoder(DataBlockEncoder encoder, 168 short encoderId) { 169 if (!idToEncoding.containsKey(encoderId)) { 170 throw new IllegalArgumentException(String.format( 171 "There is no data block encoder for given id '%d'", 172 (int) encoderId)); 173 } 174 175 DataBlockEncoding algorithm = idToEncoding.get(encoderId); 176 String encoderCls = encoder.getClass().getName(); 177 return encoderCls.equals(algorithm.encoderCls); 178 } 179 getEncodingById(short dataBlockEncodingId)180 public static DataBlockEncoding getEncodingById(short dataBlockEncodingId) { 181 return idToEncoding.get(dataBlockEncodingId); 182 } 183 createEncoder(String fullyQualifiedClassName)184 protected static DataBlockEncoder createEncoder(String fullyQualifiedClassName){ 185 try { 186 return (DataBlockEncoder)Class.forName(fullyQualifiedClassName).newInstance(); 187 } catch (InstantiationException e) { 188 throw new RuntimeException(e); 189 } catch (IllegalAccessException e) { 190 throw new RuntimeException(e); 191 } catch (ClassNotFoundException e) { 192 throw new IllegalArgumentException(e); 193 } 194 } 195 196 } 197