1 //
2 // This software is now distributed according to
3 // the Lesser Gnu Public License.  Please see
4 // http://www.gnu.org/copyleft/lesser.txt for
5 // the details.
6 //    -- Happy Computing!
7 //
8 package com.stevesoft.pat;
9 
10 import java.util.Enumeration;
11 import java.util.Vector;
12 
13 /**
14  Shareware: package pat
15  <a href="copyright.html">Copyright 2001, Steven R. Brandt</a>
16  */
17 /**
18  * The RegexTokenizer is similar to the StringTokenizer class provided with
19  * java, but allows one to tokenize using regular expressions, rather than a
20  * simple list of characters. Tokens are any strings between the supplied
21  * regular expression, as well as any backreferences (things in parenthesis)
22  * contained within the regular expression.
23  */
24 public class RegexTokenizer implements Enumeration
25 {
26   String toParse;
27 
28   Regex r;
29 
30   int count = 0;
31 
32   Vector v = new Vector();
33 
34   Vector vi = new Vector();
35 
36   int pos = 0;
37 
38   int offset = 1;
39 
getMore()40   void getMore()
41   {
42     String s = r.right();
43     if (r.searchFrom(toParse, pos))
44     {
45       v.addElement(r.left().substring(pos));
46       vi.addElement(Integer.valueOf(r.matchFrom() + r.charsMatched()));
47       for (int i = 0; i < r.numSubs(); i++)
48       {
49         if (r.substring() != null)
50         {
51           v.addElement(r.substring(i + offset));
52           vi.addElement(Integer.valueOf(r.matchFrom(i + offset)
53                   + r.charsMatched(i + offset)));
54         }
55       }
56       pos = r.matchFrom() + r.charsMatched();
57     }
58     else if (s != null)
59     {
60       v.addElement(s);
61     }
62   }
63 
64   /** Initialize the tokenizer with a string of text and a pattern */
RegexTokenizer(String txt, String ptrn)65   public RegexTokenizer(String txt, String ptrn)
66   {
67     toParse = txt;
68     r = new Regex(ptrn);
69     offset = Regex.BackRefOffset;
70     getMore();
71   }
72 
73   /** Initialize the tokenizer with a Regex object. */
RegexTokenizer(String txt, Regex r)74   public RegexTokenizer(String txt, Regex r)
75   {
76     toParse = txt;
77     this.r = r;
78     offset = Regex.BackRefOffset;
79     getMore();
80   }
81 
82   /**
83    * This should always be cast to a String, as in StringTokenizer, and as in
84    * StringTokenizer one can do this by calling nextString().
85    */
nextElement()86   public Object nextElement()
87   {
88     if (count >= v.size())
89     {
90       getMore();
91     }
92     return v.elementAt(count++);
93   }
94 
95   /** This is the equivalent (String)nextElement(). */
nextToken()96   public String nextToken()
97   {
98     return (String) nextElement();
99   }
100 
101   /**
102    * This asks for the next token, and changes the pattern being used at the
103    * same time.
104    */
nextToken(String newpat)105   public String nextToken(String newpat)
106   {
107     try
108     {
109       r.compile(newpat);
110     } catch (RegSyntax r_)
111     {
112     }
113     return nextToken(r);
114   }
115 
116   /**
117    * This asks for the next token, and changes the pattern being used at the
118    * same time.
119    */
nextToken(Regex nr)120   public String nextToken(Regex nr)
121   {
122     r = nr;
123     if (vi.size() > count)
124     {
125       pos = ((Integer) vi.elementAt(count)).intValue();
126       v.setSize(count);
127       vi.setSize(count);
128     }
129     getMore();
130     return nextToken();
131   }
132 
133   /** Tells whether there are more tokens in the pattern. */
hasMoreElements()134   public boolean hasMoreElements()
135   {
136     if (count >= v.size())
137     {
138       getMore();
139     }
140     return count < v.size();
141   }
142 
143   /**
144    * Tells whether there are more tokens in the pattern, but in the fashion of
145    * StringTokenizer.
146    */
hasMoreTokens()147   public boolean hasMoreTokens()
148   {
149     return hasMoreElements();
150   }
151 
152   /** Determines the # of remaining tokens */
countTokens()153   public int countTokens()
154   {
155     int _count = count;
156     while (hasMoreTokens())
157     {
158       nextToken();
159     }
160     count = _count;
161     return v.size() - count;
162   }
163 
164   /** Returns all tokens in the String */
allTokens()165   public String[] allTokens()
166   {
167     countTokens();
168     String[] ret = new String[v.size()];
169     v.copyInto(ret);
170     return ret;
171   }
172 };
173