1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 package org.apache.hadoop.util;
20 
21 public class UTF8ByteArrayUtils {
22   /**
23    * Find the first occurrence of the given byte b in a UTF-8 encoded string
24    * @param utf a byte array containing a UTF-8 encoded string
25    * @param start starting offset
26    * @param end ending position
27    * @param b the byte to find
28    * @return position that first byte occures otherwise -1
29    */
findByte(byte [] utf, int start, int end, byte b)30   public static int findByte(byte [] utf, int start, int end, byte b) {
31     for(int i=start; i<end; i++) {
32       if (utf[i]==b) {
33         return i;
34       }
35     }
36     return -1;
37   }
38 
39   /**
40    * Find the first occurrence of the given bytes b in a UTF-8 encoded string
41    * @param utf a byte array containing a UTF-8 encoded string
42    * @param start starting offset
43    * @param end ending position
44    * @param b the bytes to find
45    * @return position that first byte occures otherwise -1
46    */
findBytes(byte [] utf, int start, int end, byte[] b)47   public static int findBytes(byte [] utf, int start, int end, byte[] b) {
48     int matchEnd = end - b.length;
49     for(int i=start; i<=matchEnd; i++) {
50       boolean matched = true;
51       for(int j=0; j<b.length; j++) {
52         if (utf[i+j] != b[j]) {
53           matched = false;
54           break;
55         }
56       }
57       if (matched) {
58         return i;
59       }
60     }
61     return -1;
62   }
63 
64   /**
65    * Find the nth occurrence of the given byte b in a UTF-8 encoded string
66    * @param utf a byte array containing a UTF-8 encoded string
67    * @param start starting offset
68    * @param length the length of byte array
69    * @param b the byte to find
70    * @param n the desired occurrence of the given byte
71    * @return position that nth occurrence of the given byte if exists; otherwise -1
72    */
findNthByte(byte [] utf, int start, int length, byte b, int n)73   public static int findNthByte(byte [] utf, int start, int length, byte b, int n) {
74     int pos = -1;
75     int nextStart = start;
76     for (int i = 0; i < n; i++) {
77       pos = findByte(utf, nextStart, length, b);
78       if (pos < 0) {
79         return pos;
80       }
81       nextStart = pos + 1;
82     }
83     return pos;
84   }
85 
86   /**
87    * Find the nth occurrence of the given byte b in a UTF-8 encoded string
88    * @param utf a byte array containing a UTF-8 encoded string
89    * @param b the byte to find
90    * @param n the desired occurrence of the given byte
91    * @return position that nth occurrence of the given byte if exists; otherwise -1
92    */
findNthByte(byte [] utf, byte b, int n)93   public static int findNthByte(byte [] utf, byte b, int n) {
94     return findNthByte(utf, 0, utf.length, b, n);
95   }
96 
97 }
98 
99