1 // 2 // This software is now distributed according to 3 // the Lesser Gnu Public License. Please see 4 // http://www.gnu.org/copyleft/lesser.txt for 5 // the details. 6 // -- Happy Computing! 7 // 8 package com.stevesoft.pat; 9 10 import java.util.Enumeration; 11 import java.util.Vector; 12 13 /** 14 Shareware: package pat 15 <a href="copyright.html">Copyright 2001, Steven R. Brandt</a> 16 */ 17 /** 18 * The RegexTokenizer is similar to the StringTokenizer class provided with 19 * java, but allows one to tokenize using regular expressions, rather than a 20 * simple list of characters. Tokens are any strings between the supplied 21 * regular expression, as well as any backreferences (things in parenthesis) 22 * contained within the regular expression. 23 */ 24 public class RegexTokenizer implements Enumeration 25 { 26 String toParse; 27 28 Regex r; 29 30 int count = 0; 31 32 Vector v = new Vector(); 33 34 Vector vi = new Vector(); 35 36 int pos = 0; 37 38 int offset = 1; 39 getMore()40 void getMore() 41 { 42 String s = r.right(); 43 if (r.searchFrom(toParse, pos)) 44 { 45 v.addElement(r.left().substring(pos)); 46 vi.addElement(Integer.valueOf(r.matchFrom() + r.charsMatched())); 47 for (int i = 0; i < r.numSubs(); i++) 48 { 49 if (r.substring() != null) 50 { 51 v.addElement(r.substring(i + offset)); 52 vi.addElement(Integer.valueOf(r.matchFrom(i + offset) 53 + r.charsMatched(i + offset))); 54 } 55 } 56 pos = r.matchFrom() + r.charsMatched(); 57 } 58 else if (s != null) 59 { 60 v.addElement(s); 61 } 62 } 63 64 /** Initialize the tokenizer with a string of text and a pattern */ RegexTokenizer(String txt, String ptrn)65 public RegexTokenizer(String txt, String ptrn) 66 { 67 toParse = txt; 68 r = new Regex(ptrn); 69 offset = Regex.BackRefOffset; 70 getMore(); 71 } 72 73 /** Initialize the tokenizer with a Regex object. */ RegexTokenizer(String txt, Regex r)74 public RegexTokenizer(String txt, Regex r) 75 { 76 toParse = txt; 77 this.r = r; 78 offset = Regex.BackRefOffset; 79 getMore(); 80 } 81 82 /** 83 * This should always be cast to a String, as in StringTokenizer, and as in 84 * StringTokenizer one can do this by calling nextString(). 85 */ nextElement()86 public Object nextElement() 87 { 88 if (count >= v.size()) 89 { 90 getMore(); 91 } 92 return v.elementAt(count++); 93 } 94 95 /** This is the equivalent (String)nextElement(). */ nextToken()96 public String nextToken() 97 { 98 return (String) nextElement(); 99 } 100 101 /** 102 * This asks for the next token, and changes the pattern being used at the 103 * same time. 104 */ nextToken(String newpat)105 public String nextToken(String newpat) 106 { 107 try 108 { 109 r.compile(newpat); 110 } catch (RegSyntax r_) 111 { 112 } 113 return nextToken(r); 114 } 115 116 /** 117 * This asks for the next token, and changes the pattern being used at the 118 * same time. 119 */ nextToken(Regex nr)120 public String nextToken(Regex nr) 121 { 122 r = nr; 123 if (vi.size() > count) 124 { 125 pos = ((Integer) vi.elementAt(count)).intValue(); 126 v.setSize(count); 127 vi.setSize(count); 128 } 129 getMore(); 130 return nextToken(); 131 } 132 133 /** Tells whether there are more tokens in the pattern. */ hasMoreElements()134 public boolean hasMoreElements() 135 { 136 if (count >= v.size()) 137 { 138 getMore(); 139 } 140 return count < v.size(); 141 } 142 143 /** 144 * Tells whether there are more tokens in the pattern, but in the fashion of 145 * StringTokenizer. 146 */ hasMoreTokens()147 public boolean hasMoreTokens() 148 { 149 return hasMoreElements(); 150 } 151 152 /** Determines the # of remaining tokens */ countTokens()153 public int countTokens() 154 { 155 int _count = count; 156 while (hasMoreTokens()) 157 { 158 nextToken(); 159 } 160 count = _count; 161 return v.size() - count; 162 } 163 164 /** Returns all tokens in the String */ allTokens()165 public String[] allTokens() 166 { 167 countTokens(); 168 String[] ret = new String[v.size()]; 169 v.copyInto(ret); 170 return ret; 171 } 172 }; 173