1 // Copyright (c) 1996 James Clark
2 // See the file COPYING for copying permission.
3
4 #ifndef Node_INCLUDED
5 #define Node_INCLUDED 1
6 #ifdef __GNUG__
7 #pragma interface
8 #endif
9
10 #include <stddef.h>
11
12 #ifdef SP_USE_DLL
13 #ifdef BUILD_LIBGROVE
14 #define GROVE_API SP_DLLEXPORT
15 #else
16 #define GROVE_API SP_DLLIMPORT
17 #endif
18 #else /* not SP_USE_DLL */
19 #define GROVE_API /* as nothing */
20 #endif /* not SP_USE_DLL */
21
22 #ifdef GROVE_NAMESPACE
23 #define GROVE_NAMESPACE_SCOPE GROVE_NAMESPACE::
24 #else
25 #define GROVE_NAMESPACE_SCOPE
26 #endif
27
28 // Supports the following modules:
29 // baseabs prlgabs0 instabs basesds0 instsds0 subdcabs
30
31 #ifdef GROVE_NAMESPACE
32 namespace GROVE_NAMESPACE {
33 #endif
34
35 #ifdef SP_MULTI_BYTE
36 #ifdef SP_WCHAR_T_USHORT
37 typedef wchar_t GroveChar;
38 #else
39 typedef unsigned short GroveChar;
40 #endif
41 #else /* not SP_MULTI_BYTE */
42 typedef unsigned char GroveChar;
43 #endif /* not SP_MULTI_BYTE */
44
45
46 class NodePtr;
47 class NodeListPtr;
48 class NamedNodeListPtr;
49 class GroveString;
50 class NodeVisitor;
51 class SdataMapper;
52
53 enum AccessResult {
54 accessOK, // success
55 accessNull, // value is null
56 accessTimeout, // timed out waiting for property
57 accessNotInClass // property is not defined for class
58 };
59
60 struct GROVE_API ComponentName {
61 enum Id {
62 noId = -1,
63 idAllPropertyNames,
64 idApplicationInfo,
65 idAttributeAssignment,
66 idAttributes,
67 idAttributeValueToken,
68 idCdata,
69 idChar,
70 idChildrenPropertyName,
71 idClassName,
72 idContent,
73 idDataChar,
74 idDataPropertyName,
75 idDataSepPropertyName,
76 idDefaulted,
77 idDefaultedEntities,
78 idDocumentElement,
79 idDocumentType,
80 idDoctypesAndLinktypes,
81 idElement,
82 idElements,
83 idEntities,
84 idEntity,
85 idEntityName,
86 idEntityType,
87 idEpilog,
88 idExternalData,
89 idExternalId,
90 idGeneralEntities,
91 idGeneratedSystemId,
92 idGi,
93 idGoverningDoctype,
94 idGoverning,
95 idGroveRoot,
96 idId,
97 idImplied,
98 idIncluded,
99 idMustOmitEndTag,
100 idName,
101 idNdata,
102 idNotation,
103 idNotationName,
104 idNotations,
105 idOrigin,
106 idOriginToSubnodeRelPropertyName,
107 idParent,
108 idPi,
109 idProlog,
110 idPublicId,
111 idReferent,
112 idSdata,
113 idSgmlConstants,
114 idSgmlDocument,
115 idSubdocument,
116 idSubnodePropertyNames,
117 idSystemData,
118 idSystemId,
119 idText,
120 idToken,
121 idTokenSep,
122 idTreeRoot,
123 idValue
124 };
125 enum { nIds = idValue + 1 };
126 static const char *rcsName(Id);
127 static const char *sdqlName(Id);
128 };
129
130 struct GROVE_API ClassDef {
131 ComponentName::Id className;
132 const ComponentName::Id *allPropertyNames;
133 const ComponentName::Id *subnodePropertyNames;
134 ComponentName::Id childrenPropertyName;
135 ComponentName::Id dataPropertyName;
136 ComponentName::Id dataSepPropertyName;
137
138 static const ClassDef sgmlDocument;
139 static const ClassDef sgmlConstants;
140 static const ClassDef dataChar;
141 static const ClassDef element;
142 static const ClassDef attributeAssignment;
143 static const ClassDef attributeValueToken;
144 static const ClassDef pi;
145 static const ClassDef sdata;
146 static const ClassDef documentType;
147 static const ClassDef entity;
148 static const ClassDef notation;
149 static const ClassDef externalId;
150 static const ClassDef externalData;
151 static const ClassDef subdocument;
152 static const ClassDef nonSgml;
153 static const ClassDef message;
154 };
155
156 class PropertyValue;
157
158 class GROVE_API Node {
159 public:
160 // property values
161 // data in GroveString valid till Node destroyed
162 // default is accessNotInClass
163 // Intrinsic properties.
164 virtual AccessResult getOrigin(NodePtr &) const;
165 virtual AccessResult getParent(NodePtr &) const;
166 virtual AccessResult getGroveRoot(NodePtr &) const;
167 virtual AccessResult getTreeRoot(NodePtr &) const;
168 virtual AccessResult getOriginToSubnodeRelPropertyName(ComponentName::Id &) const = 0;
169 AccessResult getClassName(ComponentName::Id &) const;
170 AccessResult getChildrenPropertyName(ComponentName::Id &) const;
171 AccessResult getDataPropertyName(ComponentName::Id &) const;
172 AccessResult getDataSepPropertyName(ComponentName::Id &) const;
173 AccessResult getSubnodePropertyNames(const ComponentName::Id *&) const;
174 AccessResult getAllPropertyNames(const ComponentName::Id *&) const;
175
176 // this allows you to apply some operation to a node
177 // according to its grove class
178 virtual void accept(NodeVisitor &) = 0;
179 virtual const ClassDef &classDef() const = 0;
180 // not formally properties
181 virtual AccessResult children(NodeListPtr &) const = 0;
182 virtual AccessResult follow(NodeListPtr &) const = 0;
183 // return accessNull if there isn't a first or next
184 // result accessNotInClass if datatype of otsnr is not node-list or named-node-list
185 virtual AccessResult nextSibling(NodePtr &) const;
186 // works the same as nextSibling(), except that when charChunk()
187 // returns accessOK, returns node following that chunk.
188 virtual AccessResult nextChunkSibling(NodePtr &) const;
189 // if result == accessOK, length must be > 0
190 virtual AccessResult nextChunkAfter(NodePtr &) const;
191 virtual AccessResult charChunk(const SdataMapper &, GroveString &) const;
192 // return accessNotInClass if class doesn't have children property
193 // return accessNull if there isn't a first child
194 virtual AccessResult firstChild(NodePtr &) const;
195 // First of this node's siblings.
196 // accessNotInClass if datatype of otsnr is not node-list or named-node-list
197 virtual AccessResult firstSibling(NodePtr &) const;
198 // The index of this node in the list of all its siblings.
199 virtual AccessResult siblingsIndex(unsigned long &) const;
200 // Has a default implementation in terms of getAttributes and NodeList::ref
201 virtual AccessResult attributeRef(unsigned long, NodePtr &) const;
202 // references the list of the following siblings
203 // 0 is the next sibling
204 // Has a default implementation in terms of nextSibling.
205 virtual AccessResult followSiblingRef(unsigned long, NodePtr &) const;
206 // For a tokenized attribute returns tokens separated by spaces;
207 // null for a non-tokenized attribute.
208 virtual AccessResult tokens(GroveString &) const;
209
210 // For an element, the number of elements started before it,
211 // that is its zero-based index in a pre-order traversal of
212 // the all the elements in the document.
213 virtual AccessResult elementIndex(unsigned long &) const;
214 // Node identity.
215 // if hash() returns different values for two nodes,
216 // the operator==() must return false for those two nodes.
217 virtual unsigned long hash() const;
218 // Implementation will usually need to call sameGrove().
219 virtual bool operator==(const Node &node) const = 0;
220 bool operator!=(const Node &node) const { return !(*this == node); }
221 // Does this chunk contains nd?
222 virtual bool chunkContains(const Node &nd) const;
223 bool sameGrove(const Node &node) const;
224 typedef const char *IID;
225 virtual bool queryInterface(IID, const void *&) const;
226 // Property on SGML document giving list of parser messages.
227 virtual AccessResult getMessages(NodeListPtr &) const;
228 // Property of message.
229 enum Severity { info, warning, error };
230 virtual AccessResult getSeverity(Severity &) const;
231 AccessResult property(ComponentName::Id, const SdataMapper &, PropertyValue &) const;
232 virtual unsigned groveIndex() const = 0;
233 public:
234 virtual void addRef() = 0;
235 // You must call release rather than use delete.
236 // This is done automatically by NodePtr.
237 virtual void release() = 0;
238 protected:
239 // This enforces this.
240 #ifdef __GNUG__
241 virtual
242 #endif
~Node()243 ~Node() { }
244 public:
245 // This is special.
246 // Implemented in terms of charChunk().
247 AccessResult getChar(const SdataMapper &, GroveChar &) const;
248 // From here on derived algorithmically from property set.
249 // Properties common to several node classes.
250 virtual AccessResult getAttributes(NamedNodeListPtr &) const;
251 virtual AccessResult getName(GroveString &) const;
252 virtual AccessResult getSystemData(GroveString &) const;
253 virtual AccessResult getEntity(NodePtr &) const;
254 virtual AccessResult getEntityName(GroveString &) const;
255 virtual AccessResult getExternalId(NodePtr &) const;
256 virtual AccessResult getNotation(NodePtr &) const;
257 // Properties only on entity
258 virtual AccessResult getText(GroveString &) const;
259 virtual AccessResult getNotationName(GroveString &) const;
260 enum EntityType { text, cdata, sdata, ndata, subdocument, pi };
261 virtual AccessResult getEntityType(EntityType &) const;
262 virtual AccessResult getDefaulted(bool &) const;
263 // Properties only on externalId
264 virtual AccessResult getPublicId(GroveString &) const;
265 virtual AccessResult getSystemId(GroveString &) const;
266 virtual AccessResult getGeneratedSystemId(GroveString &) const;
267 // Properties only on attributeAssignment.
268 virtual AccessResult getValue(NodeListPtr &) const;
269 virtual AccessResult getTokenSep(GroveChar &) const;
270 virtual AccessResult getImplied(bool &) const;
271 // Properties only on element.
272 virtual AccessResult getGi(GroveString &) const;
273 virtual bool hasGi(GroveString) const;
274 virtual AccessResult getId(GroveString &) const;
275 virtual AccessResult getContent(NodeListPtr &) const;
276 virtual AccessResult getIncluded(bool &) const;
277 virtual AccessResult getMustOmitEndTag(bool &) const;
278 // Properties only on attributeValueToken.
279 virtual AccessResult getToken(GroveString &) const;
280 virtual AccessResult getReferent(NodePtr &) const;
281 // Properties only on doctype
282 virtual AccessResult getGoverning(bool &) const;
283 virtual AccessResult getGeneralEntities(NamedNodeListPtr &) const;
284 virtual AccessResult getNotations(NamedNodeListPtr &) const;
285 // Properties only on sgmlDocument.
286 virtual AccessResult getSgmlConstants(NodePtr &) const;
287 virtual AccessResult getApplicationInfo(GroveString &) const;
288 virtual AccessResult getProlog(NodeListPtr &) const;
289 virtual AccessResult getEpilog(NodeListPtr &) const;
290 virtual AccessResult getDocumentElement(NodePtr &) const;
291 virtual AccessResult getElements(NamedNodeListPtr &) const;
292 virtual AccessResult getEntities(NamedNodeListPtr &) const;
293 virtual AccessResult getDefaultedEntities(NamedNodeListPtr &) const;
294 virtual AccessResult getGoverningDoctype(NodePtr &) const;
295 virtual AccessResult getDoctypesAndLinktypes(NamedNodeListPtr &) const;
296 // Properties only on dataChar.
297 // For a non-SGML data character (resulting from a numeric character reference).
298 // Something like this is being added in the HyTime TC.
299 virtual AccessResult getNonSgml(unsigned long &) const;
300 };
301
302 class GROVE_API NodeList {
303 public:
304 virtual AccessResult first(NodePtr &) const = 0;
305 virtual AccessResult rest(NodeListPtr &) const = 0;
306 virtual AccessResult chunkRest(NodeListPtr &) const = 0;
307 // i is a zero based index
308 // This has a default implementation in terms of first and rest.
309 virtual AccessResult ref(unsigned long i, NodePtr &) const;
310 virtual void release() = 0;
311 virtual void addRef() = 0;
312 protected:
313 #ifdef __GNUG__
314 virtual
315 #endif
~NodeList()316 ~NodeList() { }
317 };
318
319 class GROVE_API NamedNodeList {
320 public:
321 // This must NOT assume that the string has been normalized.
322 virtual AccessResult namedNode(GroveString, NodePtr &) const = 0;
323 // Do name normalize appropriate for this NamedNodeList.
324 // Returns new size (always <= old size).
325 // This can be used even if list is empty
326 virtual size_t normalize(GroveChar *, size_t) const = 0;
327 // Could have used subtyping here, but accessing NamedNodeList
328 // positionally typically requires different data structure.
329 virtual NodeListPtr nodeList() const = 0;
330 // Use this when you don't care about the order.
331 // May be much more efficient than nodeList().
332 virtual NodeListPtr nodeListNoOrder() const;
333 enum Type {
334 elements,
335 attributes,
336 entities,
337 notations,
338 doctypesAndLinktypes
339 };
340 virtual Type type() const = 0;
341 // If the node is of a class that occurs in the list,
342 // return the value of the property that serves as the name
343 // property for nodes of that class in the named node list.
344 // Return accessNotInClass if the node is not of a class
345 // that occurs in the list.
346 AccessResult nodeName(const NodePtr &, GroveString &) const;
347 virtual void release() = 0;
348 virtual void addRef() = 0;
349 protected:
350 #ifdef __GNUG__
351 virtual
352 #endif
~NamedNodeList()353 ~NamedNodeList() { }
354 };
355
356 class GROVE_API NodePtr {
357 public:
NodePtr()358 NodePtr() : node_(0) { }
NodePtr(Node * node)359 NodePtr(Node *node) : node_(node) { addRef(); }
~NodePtr()360 ~NodePtr() { release(); }
NodePtr(const NodePtr & ptr)361 NodePtr(const NodePtr &ptr) : node_(ptr.node_) { addRef(); }
362 NodePtr &operator=(const NodePtr &ptr) {
363 ptr.addRef();
364 release();
365 node_ = ptr.node_;
366 return *this;
367 }
368 Node *operator->() const { return node_; }
369 Node &operator*() const { return *node_; }
assignOrigin()370 AccessResult assignOrigin() { return node_->getOrigin(*this); }
assignFirstChild()371 AccessResult assignFirstChild() { return node_->firstChild(*this); }
assignNextSibling()372 AccessResult assignNextSibling() { return node_->nextSibling(*this); }
assignNextChunkSibling()373 AccessResult assignNextChunkSibling() {
374 return node_->nextChunkSibling(*this);
375 }
assignNextChunkAfter()376 AccessResult assignNextChunkAfter() {
377 return node_->nextChunkAfter(*this);
378 }
assignFirstSibling()379 AccessResult assignFirstSibling() { return node_->firstSibling(*this); }
assign(Node * node)380 void assign(Node *node) {
381 if (node)
382 node->addRef();
383 release();
384 node_ = node;
385 }
clear()386 void clear() { release(); node_ = 0; }
387 operator bool() const { return node_ != 0; }
388 private:
addRef()389 void addRef() const { if (node_) node_->addRef(); }
release()390 void release() const { if (node_) node_->release(); }
391 Node *node_;
392 };
393
394 class GROVE_API NodeListPtr {
395 public:
NodeListPtr()396 NodeListPtr() : list_(0) { }
NodeListPtr(NodeList * list)397 NodeListPtr(NodeList *list) : list_(list) { addRef(); }
~NodeListPtr()398 ~NodeListPtr() { release(); }
NodeListPtr(const NodeListPtr & ptr)399 NodeListPtr(const NodeListPtr &ptr) : list_(ptr.list_) { addRef(); }
400 NodeListPtr &operator=(const NodeListPtr &ptr) {
401 ptr.addRef();
402 release();
403 list_ = ptr.list_;
404 return *this;
405 }
assignRest()406 AccessResult assignRest() { return list_->rest(*this); }
assignChunkRest()407 AccessResult assignChunkRest() { return list_->chunkRest(*this); }
408 NodeList *operator->() const { return list_; }
409 NodeList &operator*() const { return *list_; }
assign(NodeList * list)410 void assign(NodeList *list) {
411 if (list)
412 list->addRef();
413 release();
414 list_ = list;
415 }
clear()416 void clear() { release(); list_ = 0; }
417 operator bool() const { return list_ != 0; }
418 private:
addRef()419 void addRef() const { if (list_) list_->addRef(); }
release()420 void release() const { if (list_) list_->release(); }
421 NodeList *list_;
422 };
423
424 class GROVE_API NamedNodeListPtr {
425 public:
NamedNodeListPtr()426 NamedNodeListPtr() : list_(0) { }
NamedNodeListPtr(NamedNodeList * list)427 NamedNodeListPtr(NamedNodeList *list) : list_(list) { addRef(); }
~NamedNodeListPtr()428 ~NamedNodeListPtr() { release(); }
NamedNodeListPtr(const NamedNodeListPtr & ptr)429 NamedNodeListPtr(const NamedNodeListPtr &ptr) : list_(ptr.list_) { addRef(); }
430 NamedNodeListPtr &operator=(const NamedNodeListPtr &ptr) {
431 ptr.addRef();
432 release();
433 list_ = ptr.list_;
434 return *this;
435 }
436 NamedNodeList *operator->() const { return list_; }
437 NamedNodeList &operator*() const { return *list_; }
assign(NamedNodeList * list)438 void assign(NamedNodeList *list) {
439 if (list)
440 list->addRef();
441 release();
442 list_ = list;
443 }
clear()444 void clear() { release(); list_ = 0; }
445 operator bool() const { return list_ != 0; }
446 private:
addRef()447 void addRef() const { if (list_) list_->addRef(); }
release()448 void release() const { if (list_) list_->release(); }
449 NamedNodeList *list_;
450 };
451
452 class GROVE_API GroveString {
453 public:
454 typedef const GroveChar *const_iterator;
GroveString()455 GroveString() : data_(0), size_(0) { }
GroveString(const GroveChar * data,size_t size)456 GroveString(const GroveChar *data, size_t size)
457 : data_(data), size_(size) { }
size()458 size_t size() const { return size_; }
data()459 const GroveChar *data() const { return data_; }
assign(const GroveChar * data,size_t size)460 void assign(const GroveChar *data, size_t size) {
461 data_ = data;
462 size_ = size;
463 }
464 bool operator==(const GroveString &str) const;
465 bool operator!=(const GroveString &str) const { return !(*this == str); }
466 GroveChar operator[](size_t i) const { return data_[i]; }
begin()467 const_iterator begin() const { return data_; }
end()468 const_iterator end() const { return data_ + size_; }
469 private:
470 const GroveChar *data_;
471 size_t size_;
472 };
473
474 class GROVE_API SdataMapper {
475 public:
476 virtual ~SdataMapper();
477 // Returns a pointer to a single character or null
478 virtual bool sdataMap(GroveString name, GroveString text, GroveChar &) const;
479 };
480
481 // See Design Patterns.
482
483 class GROVE_API NodeVisitor {
484 public:
485 virtual void sgmlDocument(Node &);
486 virtual void sgmlConstants(Node &);
487 virtual void dataChar(Node &);
488 virtual void element(Node &);
489 virtual void attributeAssignment(Node &);
490 virtual void attributeValueToken(Node &);
491 virtual void pi(Node &);
492 virtual void sdata(Node &);
493 virtual void documentType(Node &);
494 virtual void entity(Node &);
495 virtual void notation(Node &);
496 virtual void externalId(Node &);
497 virtual void externalData(Node &);
498 virtual void subdocument(Node &);
499 virtual void nonSgml(Node &);
500 virtual void message(Node &);
501 };
502
503 class GROVE_API PropertyValue {
504 public:
~PropertyValue()505 virtual ~PropertyValue() { }
506 virtual void set(const NodePtr &) = 0;
507 virtual void set(const NodeListPtr &) = 0;
508 virtual void set(const NamedNodeListPtr &) = 0;
509 virtual void set(bool) = 0;
510 virtual void set(GroveChar) = 0;
511 virtual void set(GroveString) = 0;
512 virtual void set(ComponentName::Id) = 0;
513 virtual void set(const ComponentName::Id *) = 0;
514 };
515
516 inline
getChar(const SdataMapper & mapper,GroveChar & c)517 AccessResult Node::getChar(const SdataMapper &mapper, GroveChar &c) const
518 {
519 GroveString str;
520 AccessResult ret = charChunk(mapper, str);
521 if (ret == accessOK)
522 c = str[0];
523 return ret;
524 }
525
526 inline
sameGrove(const Node & node)527 bool Node::sameGrove(const Node &node) const
528 {
529 return groveIndex() == node.groveIndex();
530 }
531
532 inline
getClassName(ComponentName::Id & name)533 AccessResult Node::getClassName(ComponentName::Id &name) const
534 {
535 name = classDef().className;
536 return accessOK;
537 }
538
539 inline
getChildrenPropertyName(ComponentName::Id & name)540 AccessResult Node::getChildrenPropertyName(ComponentName::Id &name) const
541 {
542 const ClassDef &def = classDef();
543 if (def.childrenPropertyName == ComponentName::noId)
544 return accessNull;
545 name = def.childrenPropertyName;
546 return accessOK;
547 }
548
549 inline
getDataPropertyName(ComponentName::Id & name)550 AccessResult Node::getDataPropertyName(ComponentName::Id &name) const
551 {
552 const ClassDef &def = classDef();
553 if (def.dataPropertyName == ComponentName::noId)
554 return accessNull;
555 name = def.dataPropertyName;
556 return accessOK;
557 }
558
559 inline
getDataSepPropertyName(ComponentName::Id & name)560 AccessResult Node::getDataSepPropertyName(ComponentName::Id &name) const
561 {
562 const ClassDef &def = classDef();
563 if (def.dataSepPropertyName == ComponentName::noId)
564 return accessNull;
565 name = def.dataSepPropertyName;
566 return accessOK;
567 }
568
569 inline
getSubnodePropertyNames(const ComponentName::Id * & names)570 AccessResult Node::getSubnodePropertyNames(const ComponentName::Id *&names) const
571 {
572 names = classDef().subnodePropertyNames;
573 return accessOK;
574 }
575
576 inline
getAllPropertyNames(const ComponentName::Id * & names)577 AccessResult Node::getAllPropertyNames(const ComponentName::Id *&names) const
578 {
579 names = classDef().allPropertyNames;
580 return accessOK;
581 }
582
583 #ifdef GROVE_NAMESPACE
584 }
585 #endif
586
587 #endif /* not Node_INCLUDED */
588