1 /*
2  * $Id: Perl5MatchResult.java,v 1.8 2003/11/07 20:16:25 dfs Exp $
3  *
4  * ====================================================================
5  * The Apache Software License, Version 1.1
6  *
7  * Copyright (c) 2000 The Apache Software Foundation.  All rights
8  * reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  *
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in
19  *    the documentation and/or other materials provided with the
20  *    distribution.
21  *
22  * 3. The end-user documentation included with the redistribution,
23  *    if any, must include the following acknowledgment:
24  *       "This product includes software developed by the
25  *        Apache Software Foundation (http://www.apache.org/)."
26  *    Alternately, this acknowledgment may appear in the software itself,
27  *    if and wherever such third-party acknowledgments normally appear.
28  *
29  * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
30  *    must not be used to endorse or promote products derived from this
31  *    software without prior written permission. For written
32  *    permission, please contact apache@apache.org.
33  *
34  * 5. Products derived from this software may not be called "Apache"
35  *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
36  *    name, without prior written permission of the Apache Software Foundation.
37  *
38  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
39  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
40  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
41  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
42  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
45  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
46  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
47  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
48  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49  * SUCH DAMAGE.
50  * ====================================================================
51  *
52  * This software consists of voluntary contributions made by many
53  * individuals on behalf of the Apache Software Foundation.  For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */
57 
58 
59 package org.apache.oro.text.regex;
60 
61 
62 /**
63  * A class used to store and access the results of a Perl5Pattern match.
64  *
65  * @version @version@
66  * @since 1.0
67  * @see PatternMatcher
68  * @see Perl5Matcher
69  */
70 final class Perl5MatchResult implements MatchResult {
71   /**
72    * The character offset into the line or stream where the match
73    * begins.  Pattern matching methods that look for matches a line at
74    * a time should use this field as the offset into the line
75    * of the match.  Methods that look for matches independent of line
76    * boundaries should use this field as the offset into the entire
77    * text stream.
78    */
79    int _matchBeginOffset;
80 
81 
82   /**
83    * Arrays containing the beginning and end offsets of the pattern
84    * groups matched within the actual matched pattern contained in the
85    * variable <code>match</code>.
86    * Pattern matching methods that do not match subgroups, will only contain
87    * entries for group 0, which always refers to the entire pattern.
88    * <code>beginGroupOffset</code> contains the start offset of the groups,
89    * indexed by group number, which will always be 0 for group 0.
90    * <code>endGroupOffset</code> contains the ending offset + 1 of the groups.
91    * A group matching the null string will have  <code>beginGroupOffset</code>
92    * and <code>endGroupOffset</code> entries of equal value.  Following a
93    * convention established by the GNU regular expression library for the
94    * C language, groups that are not part of a match contain -1 as their
95    * begin and end offsets.
96    */
97    int[] _beginGroupOffset, _endGroupOffset;
98 
99 
100   /**
101    * The entire string that matched the pattern.
102    */
103    String _match;
104 
105 
106   /**
107    * Constructs a MatchResult able to store match information for
108    * a number of subpattern groups.
109    * <p>
110    * @param groups  The number of groups this MatchResult can store.
111    *        Only postitive values greater than or equal to 1 make any
112    *        sense.  At minimum, a MatchResult stores one group which
113    *        represents the entire pattern matched including all subparts.
114    */
Perl5MatchResult(int groups)115   Perl5MatchResult(int groups){
116     _beginGroupOffset = new int[groups];
117     _endGroupOffset   = new int[groups];
118   }
119 
120 
121   /**
122    * @return The length of the match.
123    */
length()124   public int length(){
125     int length;
126 
127     length = (_endGroupOffset[0] - _beginGroupOffset[0]);
128 
129     return (length > 0 ? length : 0);
130   }
131 
132 
133   /**
134    * @return The number of groups contained in the result.  This number
135    *         includes the 0th group.  In other words, the result refers
136    *         to the number of parenthesized subgroups plus the entire match
137    *         itself.
138    */
groups()139   public int groups(){
140     return _beginGroupOffset.length;
141   }
142 
143   /**
144    * @param group The pattern subgroup to return.
145    * @return A string containing the indicated pattern subgroup.  Group
146    *         0 always refers to the entire match.  If a group was never
147    *         matched, it returns null.  This is not to be confused with
148    *         a group matching the null string, which will return a String
149    *         of length 0.
150    */
group(int group)151   public String group(int group){
152     int begin, end, length;
153 
154     if(group < _beginGroupOffset.length){
155       begin  = _beginGroupOffset[group];
156       end    = _endGroupOffset[group];
157       length = _match.length();
158 
159       if(begin >= 0 && end >= 0) {
160 	if(begin < length && end <= length && end > begin)
161 	  return _match.substring(begin, end);
162 	else if(begin <= end)
163 	  return "";
164       }
165     }
166 
167     return null;
168   }
169 
170   /**
171    * @param group The pattern subgroup.
172    * @return The offset into group 0 of the first token in the indicated
173    *         pattern subgroup.  If a group was never matched or does
174    *         not exist, returns -1.
175    */
begin(int group)176   public int begin(int group){
177     int begin, end;//, length;
178     if(group < _beginGroupOffset.length){
179       begin  = _beginGroupOffset[group];
180       end    = _endGroupOffset[group];
181       //length = _match.length();
182       if(begin >= 0 && end >= 0)// && begin < length && end <= length)
183 	//return _beginGroupOffset[group];
184 	return begin;
185     }
186 
187     return -1;
188   }
189 
190   /**
191    * @param group The pattern subgroup.
192    * @return Returns one plus the offset into group 0 of the last token in
193    *         the indicated pattern subgroup.  If a group was never matched
194    *         or does not exist, returns -1.  A group matching the null
195    *         string will return its start offset.
196    */
end(int group)197   public int end(int group){
198     int begin, end; //, length;
199     if(group < _beginGroupOffset.length){
200       begin  = _beginGroupOffset[group];
201       end    = _endGroupOffset[group];
202       //length = _match.length();
203       if(begin >= 0 && end >= 0)// && begin < length && end <= length)
204 	//return _endGroupOffset[group];
205 	return end;
206     }
207     return -1;
208   }
209 
210   /**
211    * Returns an offset marking the beginning of the pattern match
212    * relative to the beginning of the input.
213    * <p>
214    * @param group The pattern subgroup.
215    * @return The offset of the first token in the indicated
216    *         pattern subgroup.  If a group was never matched or does
217    *         not exist, returns -1.
218    */
beginOffset(int group)219   public int beginOffset(int group){
220     int begin, end;//, length;
221     if(group < _beginGroupOffset.length){
222       begin  = _beginGroupOffset[group];
223       end    = _endGroupOffset[group];
224       //length = _match.length();
225       if(begin >= 0 && end >= 0)// && begin < length && end <= length)
226 	//return _matchBeginOffset + _beginGroupOffset[group];
227 	return _matchBeginOffset + begin;
228     }
229     return -1;
230   }
231 
232   /**
233    * Returns an offset marking the end of the pattern match
234    * relative to the beginning of the input.
235    * <p>
236    * @param group The pattern subgroup.
237    * @return Returns one plus the offset of the last token in
238    *         the indicated pattern subgroup.  If a group was never matched
239    *         or does not exist, returns -1.  A group matching the null
240    *         string will return its start offset.
241    */
endOffset(int group)242   public int endOffset(int group){
243     int begin, end;//, length;
244     if(group < _endGroupOffset.length){
245       begin  = _beginGroupOffset[group];
246       end    = _endGroupOffset[group];
247       //length = _match.length();
248       if(begin >= 0 && end >= 0)// && begin < length && end <= length)
249 	//return _matchBeginOffset + _endGroupOffset[group];
250 	return _matchBeginOffset + end;
251     }
252     return -1;
253   }
254 
255 
256   /**
257    * The same as group(0).
258    *
259    * @return A string containing the entire match.
260    */
toString()261   public String toString() {
262     return group(0);
263   }
264 }
265