1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.hadoop.hbase; 20 21 import java.io.Serializable; 22 import java.util.Comparator; 23 24 import org.apache.hadoop.hbase.KeyValue.Type; 25 import org.apache.hadoop.hbase.classification.InterfaceAudience; 26 import org.apache.hadoop.hbase.classification.InterfaceStability; 27 import org.apache.hadoop.hbase.util.Bytes; 28 29 import com.google.common.primitives.Longs; 30 31 /** 32 * Compare two HBase cells. Do not use this method comparing <code>-ROOT-</code> or 33 * <code>hbase:meta</code> cells. Cells from these tables need a specialized comparator, one that 34 * takes account of the special formatting of the row where we have commas to delimit table from 35 * regionname, from row. See KeyValue for how it has a special comparator to do hbase:meta cells 36 * and yet another for -ROOT-. 37 */ 38 @edu.umd.cs.findbugs.annotations.SuppressWarnings( 39 value="UNKNOWN", 40 justification="Findbugs doesn't like the way we are negating the result of a compare in below") 41 @InterfaceAudience.Private 42 @InterfaceStability.Evolving 43 public class CellComparator implements Comparator<Cell>, Serializable { 44 private static final long serialVersionUID = -8760041766259623329L; 45 46 @Override compare(Cell a, Cell b)47 public int compare(Cell a, Cell b) { 48 return compare(a, b, false); 49 } 50 51 /** 52 * Compare cells. 53 * TODO: Replace with dynamic rather than static comparator so can change comparator 54 * implementation. 55 * @param a 56 * @param b 57 * @param ignoreSequenceid True if we are to compare the key portion only and ignore 58 * the sequenceid. Set to false to compare key and consider sequenceid. 59 * @return 0 if equal, -1 if a < b, and +1 if a > b. 60 */ compare(final Cell a, final Cell b, boolean ignoreSequenceid)61 public static int compare(final Cell a, final Cell b, boolean ignoreSequenceid) { 62 // row 63 int c = compareRows(a, b); 64 if (c != 0) return c; 65 66 c = compareWithoutRow(a, b); 67 if(c != 0) return c; 68 69 if (!ignoreSequenceid) { 70 // Negate following comparisons so later edits show up first 71 // mvccVersion: later sorts first 72 return Longs.compare(b.getMvccVersion(), a.getMvccVersion()); 73 } else { 74 return c; 75 } 76 } 77 findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix)78 public static int findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix) { 79 return findCommonPrefix(left.getRowArray(), right.getRowArray(), left.getRowLength() 80 - rowCommonPrefix, right.getRowLength() - rowCommonPrefix, left.getRowOffset() 81 + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix); 82 } 83 findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, int leftOffset, int rightOffset)84 private static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, 85 int leftOffset, int rightOffset) { 86 int length = Math.min(leftLength, rightLength); 87 int result = 0; 88 89 while (result < length && left[leftOffset + result] == right[rightOffset + result]) { 90 result++; 91 } 92 return result; 93 } 94 findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix)95 public static int findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix) { 96 return findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(), left.getFamilyLength() 97 - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix, left.getFamilyOffset() 98 + familyCommonPrefix, right.getFamilyOffset() + familyCommonPrefix); 99 } 100 findCommonPrefixInQualifierPart(Cell left, Cell right, int qualifierCommonPrefix)101 public static int findCommonPrefixInQualifierPart(Cell left, Cell right, 102 int qualifierCommonPrefix) { 103 return findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(), 104 left.getQualifierLength() - qualifierCommonPrefix, right.getQualifierLength() 105 - qualifierCommonPrefix, left.getQualifierOffset() + qualifierCommonPrefix, 106 right.getQualifierOffset() + qualifierCommonPrefix); 107 } 108 109 /**************** equals ****************************/ 110 equals(Cell a, Cell b)111 public static boolean equals(Cell a, Cell b){ 112 return equalsRow(a, b) 113 && equalsFamily(a, b) 114 && equalsQualifier(a, b) 115 && equalsTimestamp(a, b) 116 && equalsType(a, b); 117 } 118 equalsRow(Cell a, Cell b)119 public static boolean equalsRow(Cell a, Cell b){ 120 return Bytes.equals( 121 a.getRowArray(), a.getRowOffset(), a.getRowLength(), 122 b.getRowArray(), b.getRowOffset(), b.getRowLength()); 123 } 124 equalsFamily(Cell a, Cell b)125 public static boolean equalsFamily(Cell a, Cell b){ 126 return Bytes.equals( 127 a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), 128 b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); 129 } 130 equalsQualifier(Cell a, Cell b)131 public static boolean equalsQualifier(Cell a, Cell b){ 132 return Bytes.equals( 133 a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), 134 b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); 135 } 136 equalsTimestamp(Cell a, Cell b)137 public static boolean equalsTimestamp(Cell a, Cell b){ 138 return a.getTimestamp() == b.getTimestamp(); 139 } 140 equalsType(Cell a, Cell b)141 public static boolean equalsType(Cell a, Cell b){ 142 return a.getTypeByte() == b.getTypeByte(); 143 } 144 compareColumns(final Cell left, final Cell right)145 public static int compareColumns(final Cell left, final Cell right) { 146 int lfoffset = left.getFamilyOffset(); 147 int rfoffset = right.getFamilyOffset(); 148 int lclength = left.getQualifierLength(); 149 int rclength = right.getQualifierLength(); 150 int lfamilylength = left.getFamilyLength(); 151 int rfamilylength = right.getFamilyLength(); 152 int diff = compare(left.getFamilyArray(), lfoffset, lfamilylength, right.getFamilyArray(), 153 rfoffset, rfamilylength); 154 if (diff != 0) { 155 return diff; 156 } else { 157 return compare(left.getQualifierArray(), left.getQualifierOffset(), lclength, 158 right.getQualifierArray(), right.getQualifierOffset(), rclength); 159 } 160 } 161 compareFamilies(Cell left, Cell right)162 public static int compareFamilies(Cell left, Cell right) { 163 return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset(), left.getFamilyLength(), 164 right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength()); 165 } 166 compareQualifiers(Cell left, Cell right)167 public static int compareQualifiers(Cell left, Cell right) { 168 return Bytes.compareTo(left.getQualifierArray(), left.getQualifierOffset(), 169 left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(), 170 right.getQualifierLength()); 171 } 172 compareFlatKey(Cell left, Cell right)173 public int compareFlatKey(Cell left, Cell right) { 174 int compare = compareRows(left, right); 175 if (compare != 0) { 176 return compare; 177 } 178 return compareWithoutRow(left, right); 179 } 180 181 /** 182 * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash) 183 * so can't be treated as plain byte arrays as this method does. 184 */ compareRows(final Cell left, final Cell right)185 public static int compareRows(final Cell left, final Cell right) { 186 return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(), 187 right.getRowArray(), right.getRowOffset(), right.getRowLength()); 188 } 189 190 /** 191 * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash) 192 * so can't be treated as plain byte arrays as this method does. 193 */ compareRows(byte[] left, int loffset, int llength, byte[] right, int roffset, int rlength)194 public static int compareRows(byte[] left, int loffset, int llength, byte[] right, int roffset, 195 int rlength) { 196 return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); 197 } 198 compareWithoutRow(final Cell leftCell, final Cell rightCell)199 public static int compareWithoutRow(final Cell leftCell, final Cell rightCell) { 200 // If the column is not specified, the "minimum" key type appears the 201 // latest in the sorted order, regardless of the timestamp. This is used 202 // for specifying the last key/value in a given row, because there is no 203 // "lexicographically last column" (it would be infinitely long). The 204 // "maximum" key type does not need this behavior. 205 // Copied from KeyValue. This is bad in that we can't do memcmp w/ special rules like this. 206 // TODO 207 if (leftCell.getFamilyLength() + leftCell.getQualifierLength() == 0 208 && leftCell.getTypeByte() == Type.Minimum.getCode()) { 209 // left is "bigger", i.e. it appears later in the sorted order 210 return 1; 211 } 212 if (rightCell.getFamilyLength() + rightCell.getQualifierLength() == 0 213 && rightCell.getTypeByte() == Type.Minimum.getCode()) { 214 return -1; 215 } 216 boolean sameFamilySize = (leftCell.getFamilyLength() == rightCell.getFamilyLength()); 217 if (!sameFamilySize) { 218 // comparing column family is enough. 219 220 return Bytes.compareTo(leftCell.getFamilyArray(), leftCell.getFamilyOffset(), 221 leftCell.getFamilyLength(), rightCell.getFamilyArray(), rightCell.getFamilyOffset(), 222 rightCell.getFamilyLength()); 223 } 224 int diff = compareColumns(leftCell, rightCell); 225 if (diff != 0) return diff; 226 227 diff = compareTimestamps(leftCell, rightCell); 228 if (diff != 0) return diff; 229 230 // Compare types. Let the delete types sort ahead of puts; i.e. types 231 // of higher numbers sort before those of lesser numbers. Maximum (255) 232 // appears ahead of everything, and minimum (0) appears after 233 // everything. 234 return (0xff & rightCell.getTypeByte()) - (0xff & leftCell.getTypeByte()); 235 } 236 237 /** 238 * Compares cell's timestamps in DESCENDING order. 239 * The below older timestamps sorting ahead of newer timestamps looks 240 * wrong but it is intentional. This way, newer timestamps are first 241 * found when we iterate over a memstore and newer versions are the 242 * first we trip over when reading from a store file. 243 * @return 1 if left's timestamp < right's timestamp 244 * -1 if left's timestamp > right's timestamp 245 * 0 if both timestamps are equal 246 */ compareTimestamps(final Cell left, final Cell right)247 public static int compareTimestamps(final Cell left, final Cell right) { 248 return compareTimestamps(left.getTimestamp(), right.getTimestamp()); 249 } 250 251 /********************* hashCode ************************/ 252 253 /** 254 * Returns a hash code that is always the same for two Cells having a matching equals(..) result. 255 */ hashCode(Cell cell)256 public static int hashCode(Cell cell){ 257 if (cell == null) {// return 0 for empty Cell 258 return 0; 259 } 260 261 int hash = calculateHashForKeyValue(cell); 262 hash = 31 * hash + (int)cell.getMvccVersion(); 263 return hash; 264 } 265 266 /** 267 * Returns a hash code that is always the same for two Cells having a matching 268 * equals(..) result. Note : Ignore mvcc while calculating the hashcode 269 * 270 * @param cell 271 * @return hashCode 272 */ hashCodeIgnoreMvcc(Cell cell)273 public static int hashCodeIgnoreMvcc(Cell cell) { 274 if (cell == null) {// return 0 for empty Cell 275 return 0; 276 } 277 278 int hash = calculateHashForKeyValue(cell); 279 return hash; 280 } 281 calculateHashForKeyValue(Cell cell)282 private static int calculateHashForKeyValue(Cell cell) { 283 //pre-calculate the 3 hashes made of byte ranges 284 int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); 285 int familyHash = 286 Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); 287 int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(), 288 cell.getQualifierLength()); 289 290 //combine the 6 sub-hashes 291 int hash = 31 * rowHash + familyHash; 292 hash = 31 * hash + qualifierHash; 293 hash = 31 * hash + (int)cell.getTimestamp(); 294 hash = 31 * hash + cell.getTypeByte(); 295 return hash; 296 } 297 298 299 /******************** lengths *************************/ 300 areKeyLengthsEqual(Cell a, Cell b)301 public static boolean areKeyLengthsEqual(Cell a, Cell b) { 302 return a.getRowLength() == b.getRowLength() 303 && a.getFamilyLength() == b.getFamilyLength() 304 && a.getQualifierLength() == b.getQualifierLength(); 305 } 306 areRowLengthsEqual(Cell a, Cell b)307 public static boolean areRowLengthsEqual(Cell a, Cell b) { 308 return a.getRowLength() == b.getRowLength(); 309 } 310 311 312 /*********************common prefixes*************************/ 313 compare(byte[] left, int leftOffset, int leftLength, byte[] right, int rightOffset, int rightLength)314 private static int compare(byte[] left, int leftOffset, int leftLength, byte[] right, 315 int rightOffset, int rightLength) { 316 return Bytes.compareTo(left, leftOffset, leftLength, right, rightOffset, rightLength); 317 } 318 compareCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix)319 public static int compareCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix) { 320 return compare(left.getRowArray(), left.getRowOffset() + rowCommonPrefix, left.getRowLength() 321 - rowCommonPrefix, right.getRowArray(), right.getRowOffset() + rowCommonPrefix, 322 right.getRowLength() - rowCommonPrefix); 323 } 324 compareCommonFamilyPrefix(Cell left, Cell right, int familyCommonPrefix)325 public static int compareCommonFamilyPrefix(Cell left, Cell right, 326 int familyCommonPrefix) { 327 return compare(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix, 328 left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(), 329 right.getFamilyOffset() + familyCommonPrefix, 330 right.getFamilyLength() - familyCommonPrefix); 331 } 332 compareCommonQualifierPrefix(Cell left, Cell right, int qualCommonPrefix)333 public static int compareCommonQualifierPrefix(Cell left, Cell right, 334 int qualCommonPrefix) { 335 return compare(left.getQualifierArray(), left.getQualifierOffset() + qualCommonPrefix, 336 left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(), 337 right.getQualifierOffset() + qualCommonPrefix, right.getQualifierLength() 338 - qualCommonPrefix); 339 } 340 341 /***************** special cases ****************************/ 342 /** 343 * special case for KeyValue.equals 344 */ equalsIgnoreMvccVersion(Cell a, Cell b)345 public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){ 346 return 0 == compareStaticIgnoreMvccVersion(a, b); 347 } 348 compareStaticIgnoreMvccVersion(Cell a, Cell b)349 private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) { 350 // row 351 int c = compareRows(a, b); 352 if (c != 0) return c; 353 354 // family 355 c = compareColumns(a, b); 356 if (c != 0) return c; 357 358 // timestamp: later sorts first 359 c = compareTimestamps(a, b); 360 if (c != 0) return c; 361 362 //type 363 c = (0xff & b.getTypeByte()) - (0xff & a.getTypeByte()); 364 return c; 365 } 366 367 /** 368 * Compares timestamps in DESCENDING order. 369 * The below older timestamps sorting ahead of newer timestamps looks 370 * wrong but it is intentional. This way, newer timestamps are first 371 * found when we iterate over a memstore and newer versions are the 372 * first we trip over when reading from a store file. 373 * @return 1 if left timestamp < right timestamp 374 * -1 if left timestamp > right timestamp 375 * 0 if both timestamps are equal 376 */ compareTimestamps(final long ltimestamp, final long rtimestamp)377 private static int compareTimestamps(final long ltimestamp, final long rtimestamp) { 378 if (ltimestamp < rtimestamp) { 379 return 1; 380 } else if (ltimestamp > rtimestamp) { 381 return -1; 382 } 383 return 0; 384 } 385 386 /** 387 * Counter part for the KeyValue.RowOnlyComparator 388 */ 389 public static class RowComparator extends CellComparator { 390 @Override compare(Cell a, Cell b)391 public int compare(Cell a, Cell b) { 392 return compareRows(a, b); 393 } 394 } 395 396 /** 397 * Try to return a Cell that falls between <code>left</code> and <code>right</code> but that is 398 * shorter; i.e. takes up less space. This trick is used building HFile block index. 399 * Its an optimization. It does not always work. In this case we'll just return the 400 * <code>right</code> cell. 401 * @param comparator Comparator to use. 402 * @param left 403 * @param right 404 * @return A cell that sorts between <code>left</code> and <code>right</code>. 405 */ getMidpoint(final KeyValue.KVComparator comparator, final Cell left, final Cell right)406 public static Cell getMidpoint(final KeyValue.KVComparator comparator, final Cell left, 407 final Cell right) { 408 // TODO: Redo so only a single pass over the arrays rather than one to compare and then a 409 // second composing midpoint. 410 if (right == null) { 411 throw new IllegalArgumentException("right cell can not be null"); 412 } 413 if (left == null) { 414 return right; 415 } 416 // If Cells from meta table, don't mess around. meta table Cells have schema 417 // (table,startrow,hash) so can't be treated as plain byte arrays. Just skip out without 418 // trying to do this optimization. 419 if (comparator != null && comparator instanceof KeyValue.MetaComparator) { 420 return right; 421 } 422 int diff = compareRows(left, right); 423 if (diff > 0) { 424 throw new IllegalArgumentException("Left row sorts after right row; left=" + 425 CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); 426 } 427 if (diff < 0) { 428 // Left row is < right row. 429 byte [] midRow = getMinimumMidpointArray(left.getRowArray(), left.getRowOffset(), 430 left.getRowLength(), 431 right.getRowArray(), right.getRowOffset(), right.getRowLength()); 432 // If midRow is null, just return 'right'. Can't do optimization. 433 if (midRow == null) return right; 434 return CellUtil.createCell(midRow); 435 } 436 // Rows are same. Compare on families. 437 diff = compareFamilies(left, right); 438 if (diff > 0) { 439 throw new IllegalArgumentException("Left family sorts after right family; left=" + 440 CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); 441 } 442 if (diff < 0) { 443 byte [] midRow = getMinimumMidpointArray(left.getFamilyArray(), left.getFamilyOffset(), 444 left.getFamilyLength(), 445 right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength()); 446 // If midRow is null, just return 'right'. Can't do optimization. 447 if (midRow == null) return right; 448 // Return new Cell where we use right row and then a mid sort family. 449 return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(), 450 midRow, 0, midRow.length, HConstants.EMPTY_BYTE_ARRAY, 0, 451 HConstants.EMPTY_BYTE_ARRAY.length); 452 } 453 // Families are same. Compare on qualifiers. 454 diff = compareQualifiers(left, right); 455 if (diff > 0) { 456 throw new IllegalArgumentException("Left qualifier sorts after right qualifier; left=" + 457 CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); 458 } 459 if (diff < 0) { 460 byte [] midRow = getMinimumMidpointArray(left.getQualifierArray(), left.getQualifierOffset(), 461 left.getQualifierLength(), 462 right.getQualifierArray(), right.getQualifierOffset(), right.getQualifierLength()); 463 // If midRow is null, just return 'right'. Can't do optimization. 464 if (midRow == null) return right; 465 // Return new Cell where we use right row and family and then a mid sort qualifier. 466 return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(), 467 right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength(), 468 midRow, 0, midRow.length); 469 } 470 // No opportunity for optimization. Just return right key. 471 return right; 472 } 473 474 /** 475 * @param leftArray 476 * @param leftOffset 477 * @param leftLength 478 * @param rightArray 479 * @param rightOffset 480 * @param rightLength 481 * @return Return a new array that is between left and right and minimally sized else just return 482 * null as indicator that we could not create a mid point. 483 */ getMinimumMidpointArray(final byte [] leftArray, final int leftOffset, final int leftLength, final byte [] rightArray, final int rightOffset, final int rightLength)484 private static byte [] getMinimumMidpointArray(final byte [] leftArray, final int leftOffset, 485 final int leftLength, 486 final byte [] rightArray, final int rightOffset, final int rightLength) { 487 // rows are different 488 int minLength = leftLength < rightLength ? leftLength : rightLength; 489 int diffIdx = 0; 490 while (diffIdx < minLength && 491 leftArray[leftOffset + diffIdx] == rightArray[rightOffset + diffIdx]) { 492 diffIdx++; 493 } 494 byte [] minimumMidpointArray = null; 495 if (diffIdx >= minLength) { 496 // leftKey's row is prefix of rightKey's. 497 minimumMidpointArray = new byte[diffIdx + 1]; 498 System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1); 499 } else { 500 int diffByte = leftArray[leftOffset + diffIdx]; 501 if ((0xff & diffByte) < 0xff && (diffByte + 1) < (rightArray[rightOffset + diffIdx] & 0xff)) { 502 minimumMidpointArray = new byte[diffIdx + 1]; 503 System.arraycopy(leftArray, leftOffset, minimumMidpointArray, 0, diffIdx); 504 minimumMidpointArray[diffIdx] = (byte) (diffByte + 1); 505 } else { 506 minimumMidpointArray = new byte[diffIdx + 1]; 507 System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1); 508 } 509 } 510 return minimumMidpointArray; 511 } 512 } 513