/*
* $Id: Util.java,v 1.15 2003/11/07 20:16:25 dfs Exp $
*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
* must not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
* name, without prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
*
* A grep method is not included for two reasons: *
String
instance and stores results as a
* List
of substrings numbering no more than a specified
* limit. The string is split with a regular expression as the delimiter.
* The limit parameter essentially says to split the
* string only on at most the first limit - 1 number of pattern
* occurences.
* * This method is inspired by the Perl split() function and behaves * identically to it when used in conjunction with the Perl5Matcher and * Perl5Pattern classes except for the following difference: *
* In Perl, if the split expression contains parentheses, the split() * method creates additional list elements from each of the matching * subgroups in the pattern. In other words: *
* split(list, "/([,-])/", "8-12,15,18", Util.SPLIT_ALL)
produces the list containing: *
{ "8", "-", "12", ",", "15", ",", "18" }
The OROMatcher split method does not follow this behavior. The * following list would be produced by OROMatcher: *
{ "8", "12", "15", "18" }
To obtain the Perl behavior, use * {@link org.apache.oro.text.perl.Perl5Util#split}. *
* @param results A Collection to which the split results are appended.
* After the method returns, it contains the substrings of the input
* that occur between the regular expression delimiter occurences.
* The input will not be split into any more substrings than the
* specified limit
. A way of thinking of this is that
* only the first limit - 1
matches of the delimiting
* regular expression will be used to split the input.
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
* @param input The String
to split.
* @param limit The limit on the number of resulting split elements.
* Values <= 0 produce the same behavior as using the
* SPLIT_ALL constant which causes the limit to be
* ignored and splits to be performed on all occurrences of
* the pattern. You should use the SPLIT_ALL constant
* to achieve this behavior instead of relying on the default
* behavior associated with non-positive limit values.
* @since 2.0
*/
public static void split(Collection results, PatternMatcher matcher,
Pattern pattern, String input, int limit)
{
int beginOffset;
MatchResult currentResult;
PatternMatcherInput pinput;
pinput = new PatternMatcherInput(input);
beginOffset = 0;
while(--limit != 0 && matcher.contains(pinput, pattern)) {
currentResult = matcher.getMatch();
results.add(input.substring(beginOffset,
currentResult.beginOffset(0)));
beginOffset = currentResult.endOffset(0);
}
results.add(input.substring(beginOffset, input.length()));
}
/**
* Splits up a String
instance and stores results as a
* Collection
of all its substrings using a regular expression
* as the delimiter.
* This method is inspired by the Perl split() function and behaves
* identically to it when used in conjunction with the Perl5Matcher and
* Perl5Pattern classes except for the following difference:
*
*
split(list, "/([,-])/", "8-12,15,18")
produces the list containing: *
{ "8", "-", "12", ",", "15", ",", "18" }
The OROMatcher split method does not follow this behavior. The * following list would be produced by OROMatcher: *
{ "8", "12", "15", "18" }
To obtain the Perl behavior, use * {@link org.apache.oro.text.perl.Perl5Util#split}. *
* This method is identical to calling: *
** split(matcher, pattern, input, Util.SPLIT_ALL); *
* @param results A Collection
to which all the substrings of
* the input that occur between the regular expression delimiter
* occurences are appended.
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
* @param input The String
to split.
* @since 2.0
*/
public static void split(Collection results, PatternMatcher matcher,
Pattern pattern, String input)
{
split(results, matcher, pattern, input, SPLIT_ALL);
}
/**
* Splits up a String
instance into strings contained in a
* Vector
of size not greater than a specified limit. The
* string is split with a regular expression as the delimiter.
* The limit parameter essentially says to split the
* string only on at most the first limit - 1 number of pattern
* occurences.
*
* This method is inspired by the Perl split() function and behaves * identically to it when used in conjunction with the Perl5Matcher and * Perl5Pattern classes except for the following difference: *
* In Perl, if the split expression contains parentheses, the split() * method creates additional list elements from each of the matching * subgroups in the pattern. In other words: *
split("/([,-])/", "8-12,15,18")
produces the Vector containing: *
{ "8", "-", "12", ",", "15", ",", "18" }
The OROMatcher split method does not follow this behavior. The * following Vector would be produced by OROMatcher: *
{ "8", "12", "15", "18" }
To obtain the Perl behavior, use * {@link org.apache.oro.text.perl.Perl5Util#split}. *
* @deprecated Use
* {@link #split(Collection, PatternMatcher, Pattern, String, int)} instead.
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
* @param input The String
to split.
* @param limit The limit on the size of the returned Vector
.
* Values <= 0 produce the same behavior as using the
* SPLIT_ALL constant which causes the limit to be
* ignored and splits to be performed on all occurrences of
* the pattern. You should use the SPLIT_ALL constant
* to achieve this behavior instead of relying on the default
* behavior associated with non-positive limit values.
* @return A Vector
containing the substrings of the input
* that occur between the regular expression delimiter occurences.
* The input will not be split into any more substrings than the
* specified limit
. A way of thinking of this is that
* only the first limit - 1
matches of the delimiting
* regular expression will be used to split the input.
* @since 1.0
*/
public static Vector split(PatternMatcher matcher, Pattern pattern,
String input, int limit)
{
Vector results = new Vector(20);
split(results, matcher, pattern, input, limit);
return results;
}
/**
* Splits up a String
instance into a Vector
* of all its substrings using a regular expression as the delimiter.
* This method is inspired by the Perl split() function and behaves
* identically to it when used in conjunction with the Perl5Matcher and
* Perl5Pattern classes except for the following difference:
*
*
split("/([,-])/", "8-12,15,18")
produces the Vector containing: *
{ "8", "-", "12", ",", "15", ",", "18" }
The OROMatcher split method does not follow this behavior. The * following Vector would be produced by OROMatcher: *
{ "8", "12", "15", "18" }
To obtain the Perl behavior, use * {@link org.apache.oro.text.perl.Perl5Util#split}. *
* This method is identical to calling: *
** split(matcher, pattern, input, Util.SPLIT_ALL); *
* @deprecated Use
* {@link #split(Collection, PatternMatcher, Pattern, String)} instead.
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
* @param input The String
to split.
* @return A Vector
containing all the substrings of the input
* that occur between the regular expression delimiter occurences.
* @since 1.0
*/
public static Vector split( PatternMatcher matcher, Pattern pattern,
String input)
{
return split(matcher, pattern, input, SPLIT_ALL);
}
/**
* Searches a string for a pattern and replaces the first occurrences
* of the pattern with a Substitution up to the number of
* substitutions specified by the numSubs parameter. A
* numSubs value of SUBSTITUTE_ALL will cause all occurrences
* of the pattern to be replaced.
*
* @param matcher The regular expression matcher to execute the pattern
* search.
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
* @param input The String
on which to perform substitutions.
* @param numSubs The number of substitutions to perform. Only the
* first numSubs patterns encountered are
* substituted. If you want to substitute all occurences
* set this parameter to SUBSTITUTE_ALL .
* @return A String comprising the input string with the substitutions,
* if any, made. If no substitutions are made, the returned String
* is the original input String.
* @since 1.0
*/
public static String substitute(PatternMatcher matcher, Pattern pattern,
Substitution sub, String input, int numSubs)
{
StringBuffer buffer = new StringBuffer(input.length());
PatternMatcherInput pinput = new PatternMatcherInput(input);
// Users have indicated that they expect the result to be the
// original input string, rather than a copy, if no substitutions
// are performed,
if(substitute(buffer, matcher, pattern, sub, pinput, numSubs) != 0)
return buffer.toString();
return input;
}
/**
* Searches a string for a pattern and substitutes only the first
* occurence of the pattern.
*
* This method is identical to calling: *
** substitute(matcher, pattern, sub, input, 1); *
* @param matcher The regular expression matcher to execute the pattern
* search.
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
* @param input The String
on which to perform substitutions.
* @return A String comprising the input string with the substitutions,
* if any, made. If no substitutions are made, the returned String
* is the original input String.
* @since 1.0
*/
public static String substitute(PatternMatcher matcher, Pattern pattern,
Substitution sub, String input)
{
return substitute(matcher, pattern, sub, input, 1);
}
/**
* Searches a string for a pattern and replaces the first occurrences
* of the pattern with a Substitution up to the number of
* substitutions specified by the numSubs parameter. A
* numSubs value of SUBSTITUTE_ALL will cause all occurrences
* of the pattern to be replaced. The number of substitutions made
* is returned.
*
* @param result The StringBuffer in which to store the result of the * substitutions. The buffer is only appended to. * @param matcher The regular expression matcher to execute the pattern * search. * @param pattern The regular expression to search for and substitute * occurrences of. * @param sub The Substitution used to substitute pattern occurences. * @param input The input on which to perform substitutions. * @param numSubs The number of substitutions to perform. Only the * first numSubs patterns encountered are * substituted. If you want to substitute all occurences * set this parameter to SUBSTITUTE_ALL . * @return The number of substitutions made. * @since 2.0.6 */ public static int substitute(StringBuffer result, PatternMatcher matcher, Pattern pattern, Substitution sub, String input, int numSubs) { PatternMatcherInput pinput = new PatternMatcherInput(input); return substitute(result, matcher, pattern, sub, pinput, numSubs); } /** * Searches a string for a pattern and replaces the first occurrences * of the pattern with a Substitution up to the number of * substitutions specified by the numSubs parameter. A * numSubs value of SUBSTITUTE_ALL will cause all occurrences * of the pattern to be replaced. The number of substitutions made * is returned. *
* @param result The StringBuffer in which to store the result of the * substitutions. The buffer is only appended to. * @param matcher The regular expression matcher to execute the pattern * search. * @param pattern The regular expression to search for and substitute * occurrences of. * @param sub The Substitution used to substitute pattern occurences. * @param input The input on which to perform substitutions. * @param numSubs The number of substitutions to perform. Only the * first numSubs patterns encountered are * substituted. If you want to substitute all occurences * set this parameter to SUBSTITUTE_ALL . * @return The number of substitutions made. * @since 2.0.3 */ public static int substitute(StringBuffer result, PatternMatcher matcher, Pattern pattern, Substitution sub, PatternMatcherInput input, int numSubs) { int beginOffset, subCount; char[] inputBuffer; subCount = 0; beginOffset = input.getBeginOffset(); inputBuffer = input.getBuffer(); // Must be != 0 because SUBSTITUTE_ALL is represented by -1. // Do NOT change to numSubs > 0. while(numSubs != 0 && matcher.contains(input, pattern)) { --numSubs; ++subCount; result.append(inputBuffer, beginOffset, input.getMatchBeginOffset() - beginOffset); sub.appendSubstitution(result, matcher.getMatch(), subCount, input, matcher, pattern); beginOffset = input.getMatchEndOffset(); } result.append(inputBuffer, beginOffset, input.length() - beginOffset); return subCount; } }