1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with this 4 * work for additional information regarding copyright ownership. The ASF 5 * licenses this file to you under the Apache License, Version 2.0 (the 6 * "License"); you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 * License for the specific language governing permissions and limitations under 15 * the License. 16 */ 17 18 package org.apache.hadoop.io.file.tfile; 19 20 import java.io.DataInput; 21 import java.io.DataOutput; 22 import java.io.IOException; 23 import java.util.Comparator; 24 import java.util.List; 25 26 import org.apache.hadoop.io.Text; 27 28 /** 29 * Supporting Utility classes used by TFile, and shared by users of TFile. 30 */ 31 public final class Utils { 32 33 /** 34 * Prevent the instantiation of Utils. 35 */ Utils()36 private Utils() { 37 // nothing 38 } 39 40 /** 41 * Encoding an integer into a variable-length encoding format. Synonymous to 42 * <code>Utils#writeVLong(out, n)</code>. 43 * 44 * @param out 45 * output stream 46 * @param n 47 * The integer to be encoded 48 * @throws IOException 49 * @see Utils#writeVLong(DataOutput, long) 50 */ writeVInt(DataOutput out, int n)51 public static void writeVInt(DataOutput out, int n) throws IOException { 52 writeVLong(out, n); 53 } 54 55 /** 56 * Encoding a Long integer into a variable-length encoding format. 57 * <ul> 58 * <li>if n in [-32, 127): encode in one byte with the actual value. 59 * Otherwise, 60 * <li>if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52; 61 * byte[1]=n&0xff. Otherwise, 62 * <li>if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 - 63 * 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise, 64 * <li>if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112; 65 * byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise: 66 * <li>if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] = 67 * (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff; 68 * <li>if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] = 69 * (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff; 70 * byte[4]=(n>>8)&0xff; byte[5]=n&0xff 71 * <li>if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] = 72 * (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff; 73 * byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff; 74 * <li>if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] = 75 * (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff; 76 * byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff; 77 * byte[7]=n&0xff; 78 * <li>if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] = 79 * (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff; 80 * byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; 81 * byte[7]=(n>>8)&0xff; byte[8]=n&0xff; 82 * </ul> 83 * 84 * @param out 85 * output stream 86 * @param n 87 * the integer number 88 * @throws IOException 89 */ 90 @SuppressWarnings("fallthrough") writeVLong(DataOutput out, long n)91 public static void writeVLong(DataOutput out, long n) throws IOException { 92 if ((n < 128) && (n >= -32)) { 93 out.writeByte((int) n); 94 return; 95 } 96 97 long un = (n < 0) ? ~n : n; 98 // how many bytes do we need to represent the number with sign bit? 99 int len = (Long.SIZE - Long.numberOfLeadingZeros(un)) / 8 + 1; 100 int firstByte = (int) (n >> ((len - 1) * 8)); 101 switch (len) { 102 case 1: 103 // fall it through to firstByte==-1, len=2. 104 firstByte >>= 8; 105 case 2: 106 if ((firstByte < 20) && (firstByte >= -20)) { 107 out.writeByte(firstByte - 52); 108 out.writeByte((int) n); 109 return; 110 } 111 // fall it through to firstByte==0/-1, len=3. 112 firstByte >>= 8; 113 case 3: 114 if ((firstByte < 16) && (firstByte >= -16)) { 115 out.writeByte(firstByte - 88); 116 out.writeShort((int) n); 117 return; 118 } 119 // fall it through to firstByte==0/-1, len=4. 120 firstByte >>= 8; 121 case 4: 122 if ((firstByte < 8) && (firstByte >= -8)) { 123 out.writeByte(firstByte - 112); 124 out.writeShort(((int) n) >>> 8); 125 out.writeByte((int) n); 126 return; 127 } 128 out.writeByte(len - 129); 129 out.writeInt((int) n); 130 return; 131 case 5: 132 out.writeByte(len - 129); 133 out.writeInt((int) (n >>> 8)); 134 out.writeByte((int) n); 135 return; 136 case 6: 137 out.writeByte(len - 129); 138 out.writeInt((int) (n >>> 16)); 139 out.writeShort((int) n); 140 return; 141 case 7: 142 out.writeByte(len - 129); 143 out.writeInt((int) (n >>> 24)); 144 out.writeShort((int) (n >>> 8)); 145 out.writeByte((int) n); 146 return; 147 case 8: 148 out.writeByte(len - 129); 149 out.writeLong(n); 150 return; 151 default: 152 throw new RuntimeException("Internel error"); 153 } 154 } 155 156 /** 157 * Decoding the variable-length integer. Synonymous to 158 * <code>(int)Utils#readVLong(in)</code>. 159 * 160 * @param in 161 * input stream 162 * @return the decoded integer 163 * @throws IOException 164 * 165 * @see Utils#readVLong(DataInput) 166 */ readVInt(DataInput in)167 public static int readVInt(DataInput in) throws IOException { 168 long ret = readVLong(in); 169 if ((ret > Integer.MAX_VALUE) || (ret < Integer.MIN_VALUE)) { 170 throw new RuntimeException( 171 "Number too large to be represented as Integer"); 172 } 173 return (int) ret; 174 } 175 176 /** 177 * Decoding the variable-length integer. Suppose the value of the first byte 178 * is FB, and the following bytes are NB[*]. 179 * <ul> 180 * <li>if (FB >= -32), return (long)FB; 181 * <li>if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff; 182 * <li>if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 + 183 * NB[1]&0xff; 184 * <li>if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 + 185 * (NB[1]&0xff)<<8 + NB[2]&0xff; 186 * <li>if (FB in [-128, -121]), return interpret NB[FB+129] as a signed 187 * big-endian integer. 188 * 189 * @param in 190 * input stream 191 * @return the decoded long integer. 192 * @throws IOException 193 */ 194 readVLong(DataInput in)195 public static long readVLong(DataInput in) throws IOException { 196 int firstByte = in.readByte(); 197 if (firstByte >= -32) { 198 return firstByte; 199 } 200 201 switch ((firstByte + 128) / 8) { 202 case 11: 203 case 10: 204 case 9: 205 case 8: 206 case 7: 207 return ((firstByte + 52) << 8) | in.readUnsignedByte(); 208 case 6: 209 case 5: 210 case 4: 211 case 3: 212 return ((firstByte + 88) << 16) | in.readUnsignedShort(); 213 case 2: 214 case 1: 215 return ((firstByte + 112) << 24) | (in.readUnsignedShort() << 8) 216 | in.readUnsignedByte(); 217 case 0: 218 int len = firstByte + 129; 219 switch (len) { 220 case 4: 221 return in.readInt(); 222 case 5: 223 return ((long) in.readInt()) << 8 | in.readUnsignedByte(); 224 case 6: 225 return ((long) in.readInt()) << 16 | in.readUnsignedShort(); 226 case 7: 227 return ((long) in.readInt()) << 24 | (in.readUnsignedShort() << 8) 228 | in.readUnsignedByte(); 229 case 8: 230 return in.readLong(); 231 default: 232 throw new IOException("Corrupted VLong encoding"); 233 } 234 default: 235 throw new RuntimeException("Internal error"); 236 } 237 } 238 239 /** 240 * Write a String as a VInt n, followed by n Bytes as in Text format. 241 * 242 * @param out 243 * @param s 244 * @throws IOException 245 */ writeString(DataOutput out, String s)246 public static void writeString(DataOutput out, String s) throws IOException { 247 if (s != null) { 248 Text text = new Text(s); 249 byte[] buffer = text.getBytes(); 250 int len = text.getLength(); 251 writeVInt(out, len); 252 out.write(buffer, 0, len); 253 } else { 254 writeVInt(out, -1); 255 } 256 } 257 258 /** 259 * Read a String as a VInt n, followed by n Bytes in Text format. 260 * 261 * @param in 262 * The input stream. 263 * @return The string 264 * @throws IOException 265 */ readString(DataInput in)266 public static String readString(DataInput in) throws IOException { 267 int length = readVInt(in); 268 if (length == -1) return null; 269 byte[] buffer = new byte[length]; 270 in.readFully(buffer); 271 return Text.decode(buffer); 272 } 273 274 /** 275 * A generic Version class. We suggest applications built on top of TFile use 276 * this class to maintain version information in their meta blocks. 277 * 278 * A version number consists of a major version and a minor version. The 279 * suggested usage of major and minor version number is to increment major 280 * version number when the new storage format is not backward compatible, and 281 * increment the minor version otherwise. 282 */ 283 public static final class Version implements Comparable<Version> { 284 private final short major; 285 private final short minor; 286 287 /** 288 * Construct the Version object by reading from the input stream. 289 * 290 * @param in 291 * input stream 292 * @throws IOException 293 */ Version(DataInput in)294 public Version(DataInput in) throws IOException { 295 major = in.readShort(); 296 minor = in.readShort(); 297 } 298 299 /** 300 * Constructor. 301 * 302 * @param major 303 * major version. 304 * @param minor 305 * minor version. 306 */ Version(short major, short minor)307 public Version(short major, short minor) { 308 this.major = major; 309 this.minor = minor; 310 } 311 312 /** 313 * Write the objec to a DataOutput. The serialized format of the Version is 314 * major version followed by minor version, both as big-endian short 315 * integers. 316 * 317 * @param out 318 * The DataOutput object. 319 * @throws IOException 320 */ write(DataOutput out)321 public void write(DataOutput out) throws IOException { 322 out.writeShort(major); 323 out.writeShort(minor); 324 } 325 326 /** 327 * Get the major version. 328 * 329 * @return Major version. 330 */ getMajor()331 public int getMajor() { 332 return major; 333 } 334 335 /** 336 * Get the minor version. 337 * 338 * @return The minor version. 339 */ getMinor()340 public int getMinor() { 341 return minor; 342 } 343 344 /** 345 * Get the size of the serialized Version object. 346 * 347 * @return serialized size of the version object. 348 */ size()349 public static int size() { 350 return (Short.SIZE + Short.SIZE) / Byte.SIZE; 351 } 352 353 /** 354 * Return a string representation of the version. 355 */ toString()356 public String toString() { 357 return new StringBuilder("v").append(major).append(".").append(minor) 358 .toString(); 359 } 360 361 /** 362 * Test compatibility. 363 * 364 * @param other 365 * The Version object to test compatibility with. 366 * @return true if both versions have the same major version number; false 367 * otherwise. 368 */ compatibleWith(Version other)369 public boolean compatibleWith(Version other) { 370 return major == other.major; 371 } 372 373 /** 374 * Compare this version with another version. 375 */ 376 @Override compareTo(Version that)377 public int compareTo(Version that) { 378 if (major != that.major) { 379 return major - that.major; 380 } 381 return minor - that.minor; 382 } 383 384 @Override equals(Object other)385 public boolean equals(Object other) { 386 if (this == other) return true; 387 if (!(other instanceof Version)) return false; 388 return compareTo((Version) other) == 0; 389 } 390 391 @Override hashCode()392 public int hashCode() { 393 return (major << 16 + minor); 394 } 395 } 396 397 /** 398 * Lower bound binary search. Find the index to the first element in the list 399 * that compares greater than or equal to key. 400 * 401 * @param <T> 402 * Type of the input key. 403 * @param list 404 * The list 405 * @param key 406 * The input key. 407 * @param cmp 408 * Comparator for the key. 409 * @return The index to the desired element if it exists; or list.size() 410 * otherwise. 411 */ lowerBound(List<? extends T> list, T key, Comparator<? super T> cmp)412 public static <T> int lowerBound(List<? extends T> list, T key, 413 Comparator<? super T> cmp) { 414 int low = 0; 415 int high = list.size(); 416 417 while (low < high) { 418 int mid = (low + high) >>> 1; 419 T midVal = list.get(mid); 420 int ret = cmp.compare(midVal, key); 421 if (ret < 0) 422 low = mid + 1; 423 else high = mid; 424 } 425 return low; 426 } 427 428 /** 429 * Upper bound binary search. Find the index to the first element in the list 430 * that compares greater than the input key. 431 * 432 * @param <T> 433 * Type of the input key. 434 * @param list 435 * The list 436 * @param key 437 * The input key. 438 * @param cmp 439 * Comparator for the key. 440 * @return The index to the desired element if it exists; or list.size() 441 * otherwise. 442 */ upperBound(List<? extends T> list, T key, Comparator<? super T> cmp)443 public static <T> int upperBound(List<? extends T> list, T key, 444 Comparator<? super T> cmp) { 445 int low = 0; 446 int high = list.size(); 447 448 while (low < high) { 449 int mid = (low + high) >>> 1; 450 T midVal = list.get(mid); 451 int ret = cmp.compare(midVal, key); 452 if (ret <= 0) 453 low = mid + 1; 454 else high = mid; 455 } 456 return low; 457 } 458 459 /** 460 * Lower bound binary search. Find the index to the first element in the list 461 * that compares greater than or equal to key. 462 * 463 * @param <T> 464 * Type of the input key. 465 * @param list 466 * The list 467 * @param key 468 * The input key. 469 * @return The index to the desired element if it exists; or list.size() 470 * otherwise. 471 */ lowerBound(List<? extends Comparable<? super T>> list, T key)472 public static <T> int lowerBound(List<? extends Comparable<? super T>> list, 473 T key) { 474 int low = 0; 475 int high = list.size(); 476 477 while (low < high) { 478 int mid = (low + high) >>> 1; 479 Comparable<? super T> midVal = list.get(mid); 480 int ret = midVal.compareTo(key); 481 if (ret < 0) 482 low = mid + 1; 483 else high = mid; 484 } 485 return low; 486 } 487 488 /** 489 * Upper bound binary search. Find the index to the first element in the list 490 * that compares greater than the input key. 491 * 492 * @param <T> 493 * Type of the input key. 494 * @param list 495 * The list 496 * @param key 497 * The input key. 498 * @return The index to the desired element if it exists; or list.size() 499 * otherwise. 500 */ upperBound(List<? extends Comparable<? super T>> list, T key)501 public static <T> int upperBound(List<? extends Comparable<? super T>> list, 502 T key) { 503 int low = 0; 504 int high = list.size(); 505 506 while (low < high) { 507 int mid = (low + high) >>> 1; 508 Comparable<? super T> midVal = list.get(mid); 509 int ret = midVal.compareTo(key); 510 if (ret <= 0) 511 low = mid + 1; 512 else high = mid; 513 } 514 return low; 515 } 516 } 517