1 /* AbiWord
2  * Copyright (C) 2003 Tomas Frydrych <tomas@frydrych.uklinux.net>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301 USA.
18  */
19 
20 #ifndef UT_ITERATOR_H
21 #define UT_ITERATOR_H
22 
23 //////////////////////////////////////////////////////////////////////////////
24 //
25 //  UT_TextIterator class is an abstraction of a text iterator, making
26 //  it possible to iterate sequentially over textual data without
27 //  having to know anything about how that data might be stored.
28 //
29 //  This class is pure virtual, its sole purpose is to define generic
30 //  interface so that we can pass a generic type into and out of
31 //  functions. For example of implementation see pd_Iterator.h/cpp
32 //
33 //  Notes on imlementation
34 //  ----------------------
35 //  Any derrived classes should implement the individual functions to
36 //  conform to the behaviour outlined in the comments in the class
37 //  definion below.
38 //
39 //  In addtion, the actual iterator implementations should provide a
40 //  mechanism allowing to restrict upper and lower bounds (either at
41 //  construction or subsequently), so that when passing iterators into
42 //  functions it is not necessary to pass with them a length
43 //  parameter. For example, PD_StruxIterator can provide access to the
44 //  entire document from the start of the strux onwards; we might want
45 //  to restrict this to the part that only belongs to a particular
46 //  TextRun, etc.
47 //
48 //  Notes on use
49 //  ------------
50 //  When passing iterators into functions, the iterator should be set
51 //  at the position where processing is to start, i.e., the user is
52 //  not expected to reposition the iterator before commencing
53 //  processing. Also, the upper boundary should be restricted
54 //  appropriately to indicate where the processing is to stop; this is
55 //  preferable to passing an extra length parameter.
56 //
57 //  Tomas, November, 2003
58 //
59 
60 //////////////////////////////////////////////////////////////////////
61 // the follwoing are values that the getChar() function can fall back
62 // on when things are not entirely right ...
63 //
64 //    UT_IT_NOT_CHARACTER: when at the current position we have
65 //                         something else than text (image, etc)
66 //
67 //    UT_IT_ERROR: when things are really not going as they should
68 //                 NB: this is just to have something to fall back on,
69 //                 not an error reporting mechanism; for that see
70 //                 getStatus() below
71 #define UT_IT_NOT_CHARACTER UCS_SPACE
72 #define UT_IT_ERROR 0xffffffff
73 
74 #include "ut_types.h"
75 
76 class PD_Document;
77 class pt_PieceTable;
78 class pf_Frag;
79 
80 /////////////////////////////////////////////////////////////
81 //
82 // The following enum defines possible iterator states:
83 //
84 //     OK: need I say more?
85 //
86 //     OutOfBounds: last positioning operation took the iterator
87 //                  out of bounds; this error state is recoverable
88 //                  by using the indexing operator [], or calling
89 //                  setPosition() but the use of relative increment
90 //                  operators (++, --, +=, -=) in this state will
91 //                  lead to undefined results.
92 //
93 //     Error: any other error; this state is irrecoverable, clean up
94 //            and go home
95 //
96 enum UTIterStatus
97 {
98 	UTIter_OK,
99 	UTIter_OutOfBounds,
100 	UTIter_Error
101 };
102 
103 
104 class ABI_EXPORT UT_TextIterator
105 {
106   public:
~UT_TextIterator()107 	virtual ~UT_TextIterator() {}
108 
109 	/////////////////////////////////////////////////////////////////////////
110 	// data accessor; retrieves character at present position
111 	//
112 	// NB: I.getChar() is functionally equivalent to I[getPosition()]
113 	//
114 	virtual UT_UCS4Char getChar() = 0;
115 
116 	/////////////////////////////////////////////////////////////////////////
117 	// positon accessor; returns a value representing current postion
118 	//
119 	// NB: The position can be expressed in an arbitrary coordinate
120 	// system, typically one that makes sense to the actual
121 	// implementation; when an iterator is passed into a function, the
122 	// starting position might not be 0.
123 	//
124 	virtual UT_uint32   getPosition() const = 0;
125 
126 	////////////////////////////////////////////////////////////////////
127 	// moves iterator to position pos
128 	//
129 	virtual void setPosition(UT_uint32 pos) = 0;
130 
131 	///////////////////////////////////////////////////////////////////
132 	// set and retrieve upper bounds
133 	//
134 	virtual void      setUpperLimit(UT_uint32 maxpos) = 0;
135 	virtual UT_uint32 getUpperLimit() const = 0;
136 
137 	///////////////////////////////////////////////////////////////////
138 	// returns the current state of the iterator (see definition of
139 	// UTIterStatus above)
140 	//
141 	virtual UTIterStatus getStatus() const = 0;
142 
143 	///////////////////////////////////////////////////////////////////
144 	// finds first occurence of given string, looking in direction
145 	// indicated by bForward
146 	// failure is indicated through getStatus() == UTIter_OutOfBounds;
147 	//
148 	virtual UT_uint32 find(UT_UCS4Char * what, UT_uint32 iLen, bool bForward = true) = 0;
149 	virtual UT_uint32 find(UT_TextIterator & text, UT_uint32 iLen, bool bForward = true) = 0;
150 
151 	///////////////////////////////////////////////////////////////////
152 	// makes a copy of the iterator in its present state
153 	//
154 	virtual UT_TextIterator * makeCopy() const = 0;
155 
156 	///////////////////////////////////////////////////////////////////
157 	// increment operators
158 	//
159 	// NB: We intentionally define prefix operators only, as post-fix
160 	// versions provide no real advantage, and are less efficient
161 	//
162 	virtual UT_TextIterator & operator ++ () = 0;
163 	virtual UT_TextIterator & operator -- () = 0;
164 	virtual UT_TextIterator & operator += (UT_sint32 i) = 0;
165 	virtual UT_TextIterator & operator -= (UT_sint32 i) = 0;
166 
167 	////////////////////////////////////////////////////////////////////
168 	// subscript operator []; repostions iterator and returns
169 	// character at new postion
170 	//
171 	// NB(1): the operator physically advances the iterator to positon
172 	// pos before returning, i.e.,
173 	//
174 	//     UT_UCS4Char c = I[p];
175 	//
176 	// and
177 	//
178 	//     I.setPosition(p);
179 	//     UT_UCS4Char c = I.getChar();
180 	//
181 	// are exactly equivalent, leaving the iterator in the same state
182 	//
183 	// NB(2): if passed iterator as an argumenent in a function, you
184 	// need to know the initial position to use this operator for
185 	// processing which is relative to the state of iterator when
186 	// passed to you, i.e., f1() and f2() below do exactly the same
187 	// thing, f3() does not.
188 	//
189 	// function f1(UT_TextIterator & I, UT_uint32 len)
190 	// {
191 	//    UT_uint32 pos = I.getPosition();
192 	//
193 	//    for(UT_uint32 i = pos; i < len + pos; i++)
194 	//    {
195 	//       UT_UCS4Char c = text[i];
196 	//       // do something with c ...
197 	//    }
198 	// }
199 	//
200 	// function f2(UT_TextIterator & I, UT_uint32 len)
201 	// {
202 	//    for(UT_uint32 i = 0; i < len; ++i, ++I)
203 	//    {
204 	//       UT_UCS4Char c = text.getChar();
205 	//       // do something with c ...
206 	//    }
207 	// }
208 	//
209 	// In contrast, f3() will start at the leftmost edge of the
210 	// theoretical iterator range, which is probably not what you
211 	// want; the actual implementation of the iterator can if fact
212 	// restrict valid range of the subscript to an arbitrary range
213 	// (i.e., I[0] may produce OutOfBounds state).
214 	//
215 	// function f3(UT_TextIterator & I, UT_uint32 len)
216 	// {
217 	//    for(UT_uint32 i = 0; i < len; i++)
218 	//    {
219 	//       UT_UCS4Char c = text[i];
220 	//       // do something with c ...
221 	//    }
222 	// }
223 	//
224 	// Bottom Line: unless told otherwise, assume that
225 	// processing is to start from I.getPosition(), not 0.
226 	//
227 	virtual UT_UCS4Char   operator [](UT_uint32 pos) = 0;
228 
229 };
230 
231 
232 #endif //UT_ITERATOR_H
233