1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.hadoop.util; 20 21 public class UTF8ByteArrayUtils { 22 /** 23 * Find the first occurrence of the given byte b in a UTF-8 encoded string 24 * @param utf a byte array containing a UTF-8 encoded string 25 * @param start starting offset 26 * @param end ending position 27 * @param b the byte to find 28 * @return position that first byte occures otherwise -1 29 */ findByte(byte [] utf, int start, int end, byte b)30 public static int findByte(byte [] utf, int start, int end, byte b) { 31 for(int i=start; i<end; i++) { 32 if (utf[i]==b) { 33 return i; 34 } 35 } 36 return -1; 37 } 38 39 /** 40 * Find the first occurrence of the given bytes b in a UTF-8 encoded string 41 * @param utf a byte array containing a UTF-8 encoded string 42 * @param start starting offset 43 * @param end ending position 44 * @param b the bytes to find 45 * @return position that first byte occures otherwise -1 46 */ findBytes(byte [] utf, int start, int end, byte[] b)47 public static int findBytes(byte [] utf, int start, int end, byte[] b) { 48 int matchEnd = end - b.length; 49 for(int i=start; i<=matchEnd; i++) { 50 boolean matched = true; 51 for(int j=0; j<b.length; j++) { 52 if (utf[i+j] != b[j]) { 53 matched = false; 54 break; 55 } 56 } 57 if (matched) { 58 return i; 59 } 60 } 61 return -1; 62 } 63 64 /** 65 * Find the nth occurrence of the given byte b in a UTF-8 encoded string 66 * @param utf a byte array containing a UTF-8 encoded string 67 * @param start starting offset 68 * @param length the length of byte array 69 * @param b the byte to find 70 * @param n the desired occurrence of the given byte 71 * @return position that nth occurrence of the given byte if exists; otherwise -1 72 */ findNthByte(byte [] utf, int start, int length, byte b, int n)73 public static int findNthByte(byte [] utf, int start, int length, byte b, int n) { 74 int pos = -1; 75 int nextStart = start; 76 for (int i = 0; i < n; i++) { 77 pos = findByte(utf, nextStart, length, b); 78 if (pos < 0) { 79 return pos; 80 } 81 nextStart = pos + 1; 82 } 83 return pos; 84 } 85 86 /** 87 * Find the nth occurrence of the given byte b in a UTF-8 encoded string 88 * @param utf a byte array containing a UTF-8 encoded string 89 * @param b the byte to find 90 * @param n the desired occurrence of the given byte 91 * @return position that nth occurrence of the given byte if exists; otherwise -1 92 */ findNthByte(byte [] utf, byte b, int n)93 public static int findNthByte(byte [] utf, byte b, int n) { 94 return findNthByte(utf, 0, utf.length, b, n); 95 } 96 97 } 98 99