1 /* 2 * $Id: Perl5MatchResult.java,v 1.8 2003/11/07 20:16:25 dfs Exp $ 3 * 4 * ==================================================================== 5 * The Apache Software License, Version 1.1 6 * 7 * Copyright (c) 2000 The Apache Software Foundation. All rights 8 * reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in 19 * the documentation and/or other materials provided with the 20 * distribution. 21 * 22 * 3. The end-user documentation included with the redistribution, 23 * if any, must include the following acknowledgment: 24 * "This product includes software developed by the 25 * Apache Software Foundation (http://www.apache.org/)." 26 * Alternately, this acknowledgment may appear in the software itself, 27 * if and wherever such third-party acknowledgments normally appear. 28 * 29 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 30 * must not be used to endorse or promote products derived from this 31 * software without prior written permission. For written 32 * permission, please contact apache@apache.org. 33 * 34 * 5. Products derived from this software may not be called "Apache" 35 * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 36 * name, without prior written permission of the Apache Software Foundation. 37 * 38 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 39 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 40 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 41 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 45 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 46 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 47 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 48 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 49 * SUCH DAMAGE. 50 * ==================================================================== 51 * 52 * This software consists of voluntary contributions made by many 53 * individuals on behalf of the Apache Software Foundation. For more 54 * information on the Apache Software Foundation, please see 55 * <http://www.apache.org/>. 56 */ 57 58 59 package org.apache.oro.text.regex; 60 61 62 /** 63 * A class used to store and access the results of a Perl5Pattern match. 64 * 65 * @version @version@ 66 * @since 1.0 67 * @see PatternMatcher 68 * @see Perl5Matcher 69 */ 70 final class Perl5MatchResult implements MatchResult { 71 /** 72 * The character offset into the line or stream where the match 73 * begins. Pattern matching methods that look for matches a line at 74 * a time should use this field as the offset into the line 75 * of the match. Methods that look for matches independent of line 76 * boundaries should use this field as the offset into the entire 77 * text stream. 78 */ 79 int _matchBeginOffset; 80 81 82 /** 83 * Arrays containing the beginning and end offsets of the pattern 84 * groups matched within the actual matched pattern contained in the 85 * variable <code>match</code>. 86 * Pattern matching methods that do not match subgroups, will only contain 87 * entries for group 0, which always refers to the entire pattern. 88 * <code>beginGroupOffset</code> contains the start offset of the groups, 89 * indexed by group number, which will always be 0 for group 0. 90 * <code>endGroupOffset</code> contains the ending offset + 1 of the groups. 91 * A group matching the null string will have <code>beginGroupOffset</code> 92 * and <code>endGroupOffset</code> entries of equal value. Following a 93 * convention established by the GNU regular expression library for the 94 * C language, groups that are not part of a match contain -1 as their 95 * begin and end offsets. 96 */ 97 int[] _beginGroupOffset, _endGroupOffset; 98 99 100 /** 101 * The entire string that matched the pattern. 102 */ 103 String _match; 104 105 106 /** 107 * Constructs a MatchResult able to store match information for 108 * a number of subpattern groups. 109 * <p> 110 * @param groups The number of groups this MatchResult can store. 111 * Only postitive values greater than or equal to 1 make any 112 * sense. At minimum, a MatchResult stores one group which 113 * represents the entire pattern matched including all subparts. 114 */ Perl5MatchResult(int groups)115 Perl5MatchResult(int groups){ 116 _beginGroupOffset = new int[groups]; 117 _endGroupOffset = new int[groups]; 118 } 119 120 121 /** 122 * @return The length of the match. 123 */ length()124 public int length(){ 125 int length; 126 127 length = (_endGroupOffset[0] - _beginGroupOffset[0]); 128 129 return (length > 0 ? length : 0); 130 } 131 132 133 /** 134 * @return The number of groups contained in the result. This number 135 * includes the 0th group. In other words, the result refers 136 * to the number of parenthesized subgroups plus the entire match 137 * itself. 138 */ groups()139 public int groups(){ 140 return _beginGroupOffset.length; 141 } 142 143 /** 144 * @param group The pattern subgroup to return. 145 * @return A string containing the indicated pattern subgroup. Group 146 * 0 always refers to the entire match. If a group was never 147 * matched, it returns null. This is not to be confused with 148 * a group matching the null string, which will return a String 149 * of length 0. 150 */ group(int group)151 public String group(int group){ 152 int begin, end, length; 153 154 if(group < _beginGroupOffset.length){ 155 begin = _beginGroupOffset[group]; 156 end = _endGroupOffset[group]; 157 length = _match.length(); 158 159 if(begin >= 0 && end >= 0) { 160 if(begin < length && end <= length && end > begin) 161 return _match.substring(begin, end); 162 else if(begin <= end) 163 return ""; 164 } 165 } 166 167 return null; 168 } 169 170 /** 171 * @param group The pattern subgroup. 172 * @return The offset into group 0 of the first token in the indicated 173 * pattern subgroup. If a group was never matched or does 174 * not exist, returns -1. 175 */ begin(int group)176 public int begin(int group){ 177 int begin, end;//, length; 178 if(group < _beginGroupOffset.length){ 179 begin = _beginGroupOffset[group]; 180 end = _endGroupOffset[group]; 181 //length = _match.length(); 182 if(begin >= 0 && end >= 0)// && begin < length && end <= length) 183 //return _beginGroupOffset[group]; 184 return begin; 185 } 186 187 return -1; 188 } 189 190 /** 191 * @param group The pattern subgroup. 192 * @return Returns one plus the offset into group 0 of the last token in 193 * the indicated pattern subgroup. If a group was never matched 194 * or does not exist, returns -1. A group matching the null 195 * string will return its start offset. 196 */ end(int group)197 public int end(int group){ 198 int begin, end; //, length; 199 if(group < _beginGroupOffset.length){ 200 begin = _beginGroupOffset[group]; 201 end = _endGroupOffset[group]; 202 //length = _match.length(); 203 if(begin >= 0 && end >= 0)// && begin < length && end <= length) 204 //return _endGroupOffset[group]; 205 return end; 206 } 207 return -1; 208 } 209 210 /** 211 * Returns an offset marking the beginning of the pattern match 212 * relative to the beginning of the input. 213 * <p> 214 * @param group The pattern subgroup. 215 * @return The offset of the first token in the indicated 216 * pattern subgroup. If a group was never matched or does 217 * not exist, returns -1. 218 */ beginOffset(int group)219 public int beginOffset(int group){ 220 int begin, end;//, length; 221 if(group < _beginGroupOffset.length){ 222 begin = _beginGroupOffset[group]; 223 end = _endGroupOffset[group]; 224 //length = _match.length(); 225 if(begin >= 0 && end >= 0)// && begin < length && end <= length) 226 //return _matchBeginOffset + _beginGroupOffset[group]; 227 return _matchBeginOffset + begin; 228 } 229 return -1; 230 } 231 232 /** 233 * Returns an offset marking the end of the pattern match 234 * relative to the beginning of the input. 235 * <p> 236 * @param group The pattern subgroup. 237 * @return Returns one plus the offset of the last token in 238 * the indicated pattern subgroup. If a group was never matched 239 * or does not exist, returns -1. A group matching the null 240 * string will return its start offset. 241 */ endOffset(int group)242 public int endOffset(int group){ 243 int begin, end;//, length; 244 if(group < _endGroupOffset.length){ 245 begin = _beginGroupOffset[group]; 246 end = _endGroupOffset[group]; 247 //length = _match.length(); 248 if(begin >= 0 && end >= 0)// && begin < length && end <= length) 249 //return _matchBeginOffset + _endGroupOffset[group]; 250 return _matchBeginOffset + end; 251 } 252 return -1; 253 } 254 255 256 /** 257 * The same as group(0). 258 * 259 * @return A string containing the entire match. 260 */ toString()261 public String toString() { 262 return group(0); 263 } 264 } 265