1 // Copyright (c) 1996 James Clark
2 // See the file COPYING for copying permission.
3 
4 #ifndef Node_INCLUDED
5 #define Node_INCLUDED 1
6 #ifdef __GNUG__
7 #pragma interface
8 #endif
9 
10 #include <stddef.h>
11 
12 #ifdef SP_USE_DLL
13 #ifdef BUILD_LIBGROVE
14 #define GROVE_API SP_DLLEXPORT
15 #else
16 #define GROVE_API SP_DLLIMPORT
17 #endif
18 #else /* not SP_USE_DLL */
19 #define GROVE_API /* as nothing */
20 #endif /* not SP_USE_DLL */
21 
22 #ifdef GROVE_NAMESPACE
23 #define GROVE_NAMESPACE_SCOPE GROVE_NAMESPACE::
24 #else
25 #define GROVE_NAMESPACE_SCOPE
26 #endif
27 
28 // Supports the following modules:
29 // baseabs prlgabs0 instabs basesds0 instsds0 subdcabs
30 
31 #ifdef GROVE_NAMESPACE
32 namespace GROVE_NAMESPACE {
33 #endif
34 
35 #ifdef SP_MULTI_BYTE
36 #ifdef SP_WCHAR_T_USHORT
37 typedef wchar_t GroveChar;
38 #else
39 typedef unsigned short GroveChar;
40 #endif
41 #else /* not SP_MULTI_BYTE */
42 typedef unsigned char GroveChar;
43 #endif /* not SP_MULTI_BYTE */
44 
45 
46 class NodePtr;
47 class NodeListPtr;
48 class NamedNodeListPtr;
49 class GroveString;
50 class NodeVisitor;
51 class SdataMapper;
52 
53 enum AccessResult {
54   accessOK,			// success
55   accessNull,			// value is null
56   accessTimeout,                // timed out waiting for property
57   accessNotInClass		// property is not defined for class
58 };
59 
60 struct GROVE_API ComponentName {
61   enum Id {
62     noId = -1,
63     idAllPropertyNames,
64     idApplicationInfo,
65     idAttributeAssignment,
66     idAttributes,
67     idAttributeValueToken,
68     idCdata,
69     idChar,
70     idChildrenPropertyName,
71     idClassName,
72     idContent,
73     idDataChar,
74     idDataPropertyName,
75     idDataSepPropertyName,
76     idDefaulted,
77     idDefaultedEntities,
78     idDocumentElement,
79     idDocumentType,
80     idDoctypesAndLinktypes,
81     idElement,
82     idElements,
83     idEntities,
84     idEntity,
85     idEntityName,
86     idEntityType,
87     idEpilog,
88     idExternalData,
89     idExternalId,
90     idGeneralEntities,
91     idGeneratedSystemId,
92     idGi,
93     idGoverningDoctype,
94     idGoverning,
95     idGroveRoot,
96     idId,
97     idImplied,
98     idIncluded,
99     idMustOmitEndTag,
100     idName,
101     idNdata,
102     idNotation,
103     idNotationName,
104     idNotations,
105     idOrigin,
106     idOriginToSubnodeRelPropertyName,
107     idParent,
108     idPi,
109     idProlog,
110     idPublicId,
111     idReferent,
112     idSdata,
113     idSgmlConstants,
114     idSgmlDocument,
115     idSubdocument,
116     idSubnodePropertyNames,
117     idSystemData,
118     idSystemId,
119     idText,
120     idToken,
121     idTokenSep,
122     idTreeRoot,
123     idValue
124   };
125   enum { nIds = idValue + 1 };
126   static const char *rcsName(Id);
127   static const char *sdqlName(Id);
128 };
129 
130 struct GROVE_API ClassDef {
131   ComponentName::Id className;
132   const ComponentName::Id *allPropertyNames;
133   const ComponentName::Id *subnodePropertyNames;
134   ComponentName::Id childrenPropertyName;
135   ComponentName::Id dataPropertyName;
136   ComponentName::Id dataSepPropertyName;
137 
138   static const ClassDef sgmlDocument;
139   static const ClassDef sgmlConstants;
140   static const ClassDef dataChar;
141   static const ClassDef element;
142   static const ClassDef attributeAssignment;
143   static const ClassDef attributeValueToken;
144   static const ClassDef pi;
145   static const ClassDef sdata;
146   static const ClassDef documentType;
147   static const ClassDef entity;
148   static const ClassDef notation;
149   static const ClassDef externalId;
150   static const ClassDef externalData;
151   static const ClassDef subdocument;
152   static const ClassDef nonSgml;
153   static const ClassDef message;
154 };
155 
156 class PropertyValue;
157 
158 class GROVE_API Node {
159 public:
160   // property values
161   // data in GroveString valid till Node destroyed
162   // default is accessNotInClass
163   // Intrinsic properties.
164   virtual AccessResult getOrigin(NodePtr &) const;
165   virtual AccessResult getParent(NodePtr &) const;
166   virtual AccessResult getGroveRoot(NodePtr &) const;
167   virtual AccessResult getTreeRoot(NodePtr &) const;
168   virtual AccessResult getOriginToSubnodeRelPropertyName(ComponentName::Id &) const = 0;
169   AccessResult getClassName(ComponentName::Id &) const;
170   AccessResult getChildrenPropertyName(ComponentName::Id &) const;
171   AccessResult getDataPropertyName(ComponentName::Id &) const;
172   AccessResult getDataSepPropertyName(ComponentName::Id &) const;
173   AccessResult getSubnodePropertyNames(const ComponentName::Id *&) const;
174   AccessResult getAllPropertyNames(const ComponentName::Id *&) const;
175 
176   // this allows you to apply some operation to a node
177   // according to its grove class
178   virtual void accept(NodeVisitor &) = 0;
179   virtual const ClassDef &classDef() const = 0;
180   // not formally properties
181   virtual AccessResult children(NodeListPtr &) const = 0;
182   virtual AccessResult follow(NodeListPtr &) const = 0;
183   // return accessNull if there isn't a first or next
184   // result accessNotInClass if datatype of otsnr is not node-list or named-node-list
185   virtual AccessResult nextSibling(NodePtr &) const;
186   // works the same as nextSibling(), except that when charChunk()
187   // returns accessOK, returns node following that chunk.
188   virtual AccessResult nextChunkSibling(NodePtr &) const;
189   // if result == accessOK, length must be > 0
190   virtual AccessResult nextChunkAfter(NodePtr &) const;
191   virtual AccessResult charChunk(const SdataMapper &, GroveString &) const;
192   // return accessNotInClass if class doesn't have children property
193   // return accessNull if there isn't a first child
194   virtual AccessResult firstChild(NodePtr &) const;
195   // First of this node's siblings.
196   // accessNotInClass if datatype of otsnr is not node-list or named-node-list
197   virtual AccessResult firstSibling(NodePtr &) const;
198   // The index of this node in the list of all its siblings.
199   virtual AccessResult siblingsIndex(unsigned long &) const;
200   // Has a default implementation in terms of getAttributes and NodeList::ref
201   virtual AccessResult attributeRef(unsigned long, NodePtr &) const;
202   // references the list of the following siblings
203   // 0 is the next sibling
204   // Has a default implementation in terms of nextSibling.
205   virtual AccessResult followSiblingRef(unsigned long, NodePtr &) const;
206   // For a tokenized attribute returns tokens separated by spaces;
207   // null for a non-tokenized attribute.
208   virtual AccessResult tokens(GroveString &) const;
209 
210   // For an element, the number of elements started before it,
211   // that is its zero-based index in a pre-order traversal of
212   // the all the elements in the document.
213   virtual AccessResult elementIndex(unsigned long &) const;
214   // Node identity.
215   // if hash() returns different values for two nodes,
216   // the operator==() must return false for those two nodes.
217   virtual unsigned long hash() const;
218   // Implementation will usually need to call sameGrove().
219   virtual bool operator==(const Node &node) const = 0;
220   bool operator!=(const Node &node) const { return !(*this == node); }
221   // Does this chunk contains nd?
222   virtual bool chunkContains(const Node &nd) const;
223   bool sameGrove(const Node &node) const;
224   typedef const char *IID;
225   virtual bool queryInterface(IID, const void *&) const;
226   // Property on SGML document giving list of parser messages.
227   virtual AccessResult getMessages(NodeListPtr &) const;
228   // Property of message.
229   enum Severity { info, warning, error };
230   virtual AccessResult getSeverity(Severity &) const;
231   AccessResult property(ComponentName::Id, const SdataMapper &, PropertyValue &) const;
232   virtual unsigned groveIndex() const = 0;
233 public:
234   virtual void addRef() = 0;
235   // You must call release rather than use delete.
236   // This is done automatically by NodePtr.
237   virtual void release() = 0;
238 protected:
239   // This enforces this.
240 #ifdef __GNUG__
241   virtual
242 #endif
~Node()243     ~Node() { }
244 public:
245   // This is special.
246   // Implemented in terms of charChunk().
247   AccessResult getChar(const SdataMapper &, GroveChar &) const;
248   // From here on derived algorithmically from property set.
249   // Properties common to several node classes.
250   virtual AccessResult getAttributes(NamedNodeListPtr &) const;
251   virtual AccessResult getName(GroveString &) const;
252   virtual AccessResult getSystemData(GroveString &) const;
253   virtual AccessResult getEntity(NodePtr &) const;
254   virtual AccessResult getEntityName(GroveString &) const;
255   virtual AccessResult getExternalId(NodePtr &) const;
256   virtual AccessResult getNotation(NodePtr &) const;
257   // Properties only on entity
258   virtual AccessResult getText(GroveString &) const;
259   virtual AccessResult getNotationName(GroveString &) const;
260   enum EntityType { text, cdata, sdata, ndata, subdocument, pi };
261   virtual AccessResult getEntityType(EntityType &) const;
262   virtual AccessResult getDefaulted(bool &) const;
263   // Properties only on externalId
264   virtual AccessResult getPublicId(GroveString &) const;
265   virtual AccessResult getSystemId(GroveString &) const;
266   virtual AccessResult getGeneratedSystemId(GroveString &) const;
267   // Properties only on attributeAssignment.
268   virtual AccessResult getValue(NodeListPtr &) const;
269   virtual AccessResult getTokenSep(GroveChar &) const;
270   virtual AccessResult getImplied(bool &) const;
271   // Properties only on element.
272   virtual AccessResult getGi(GroveString &) const;
273   virtual bool hasGi(GroveString) const;
274   virtual AccessResult getId(GroveString &) const;
275   virtual AccessResult getContent(NodeListPtr &) const;
276   virtual AccessResult getIncluded(bool &) const;
277   virtual AccessResult getMustOmitEndTag(bool &) const;
278   // Properties only on attributeValueToken.
279   virtual AccessResult getToken(GroveString &) const;
280   virtual AccessResult getReferent(NodePtr &) const;
281   // Properties only on doctype
282   virtual AccessResult getGoverning(bool &) const;
283   virtual AccessResult getGeneralEntities(NamedNodeListPtr &) const;
284   virtual AccessResult getNotations(NamedNodeListPtr &) const;
285   // Properties only on sgmlDocument.
286   virtual AccessResult getSgmlConstants(NodePtr &) const;
287   virtual AccessResult getApplicationInfo(GroveString &) const;
288   virtual AccessResult getProlog(NodeListPtr &) const;
289   virtual AccessResult getEpilog(NodeListPtr &) const;
290   virtual AccessResult getDocumentElement(NodePtr &) const;
291   virtual AccessResult getElements(NamedNodeListPtr &) const;
292   virtual AccessResult getEntities(NamedNodeListPtr &) const;
293   virtual AccessResult getDefaultedEntities(NamedNodeListPtr &) const;
294   virtual AccessResult getGoverningDoctype(NodePtr &) const;
295   virtual AccessResult getDoctypesAndLinktypes(NamedNodeListPtr &) const;
296   // Properties only on dataChar.
297   // For a non-SGML data character (resulting from a numeric character reference).
298   // Something like this is being added in the HyTime TC.
299   virtual AccessResult getNonSgml(unsigned long &) const;
300 };
301 
302 class GROVE_API NodeList {
303 public:
304   virtual AccessResult first(NodePtr &) const = 0;
305   virtual AccessResult rest(NodeListPtr &) const = 0;
306   virtual AccessResult chunkRest(NodeListPtr &) const = 0;
307   // i is a zero based index
308   // This has a default implementation in terms of first and rest.
309   virtual AccessResult ref(unsigned long i, NodePtr &) const;
310   virtual void release() = 0;
311   virtual void addRef() = 0;
312 protected:
313 #ifdef __GNUG__
314   virtual
315 #endif
~NodeList()316     ~NodeList() { }
317 };
318 
319 class GROVE_API NamedNodeList {
320 public:
321   // This must NOT assume that the string has been normalized.
322   virtual AccessResult namedNode(GroveString, NodePtr &) const = 0;
323   // Do name normalize appropriate for this NamedNodeList.
324   // Returns new size (always <= old size).
325   // This can be used even if list is empty
326   virtual size_t normalize(GroveChar *, size_t) const = 0;
327   // Could have used subtyping here, but accessing NamedNodeList
328   // positionally typically requires different data structure.
329   virtual NodeListPtr nodeList() const = 0;
330   // Use this when you don't care about the order.
331   // May be much more efficient than nodeList().
332   virtual NodeListPtr nodeListNoOrder() const;
333   enum Type {
334     elements,
335     attributes,
336     entities,
337     notations,
338     doctypesAndLinktypes
339   };
340   virtual Type type() const = 0;
341   // If the node is of a class that occurs in the list,
342   // return the value of the property that serves as the name
343   // property for nodes of that class in the named node list.
344   // Return accessNotInClass if the node is not of a class
345   // that occurs in the list.
346   AccessResult nodeName(const NodePtr &, GroveString &) const;
347   virtual void release() = 0;
348   virtual void addRef() = 0;
349 protected:
350 #ifdef __GNUG__
351   virtual
352 #endif
~NamedNodeList()353     ~NamedNodeList() { }
354 };
355 
356 class GROVE_API NodePtr {
357 public:
NodePtr()358   NodePtr() : node_(0) { }
NodePtr(Node * node)359   NodePtr(Node *node) : node_(node) { addRef(); }
~NodePtr()360   ~NodePtr() { release(); }
NodePtr(const NodePtr & ptr)361   NodePtr(const NodePtr &ptr) : node_(ptr.node_) { addRef(); }
362   NodePtr &operator=(const NodePtr &ptr) {
363     ptr.addRef();
364     release();
365     node_ = ptr.node_;
366     return *this;
367   }
368   Node *operator->() const { return node_; }
369   Node &operator*() const { return *node_; }
assignOrigin()370   AccessResult assignOrigin() { return node_->getOrigin(*this); }
assignFirstChild()371   AccessResult assignFirstChild() { return node_->firstChild(*this); }
assignNextSibling()372   AccessResult assignNextSibling() { return node_->nextSibling(*this); }
assignNextChunkSibling()373   AccessResult assignNextChunkSibling() {
374     return node_->nextChunkSibling(*this);
375   }
assignNextChunkAfter()376   AccessResult assignNextChunkAfter() {
377     return node_->nextChunkAfter(*this);
378   }
assignFirstSibling()379   AccessResult assignFirstSibling() { return node_->firstSibling(*this); }
assign(Node * node)380   void assign(Node *node) {
381     if (node)
382       node->addRef();
383     release();
384     node_ = node;
385   }
clear()386   void clear() { release(); node_ = 0; }
387   operator bool() const { return node_ != 0; }
388 private:
addRef()389   void addRef() const { if (node_) node_->addRef(); }
release()390   void release() const { if (node_) node_->release(); }
391   Node *node_;
392 };
393 
394 class GROVE_API NodeListPtr {
395 public:
NodeListPtr()396   NodeListPtr() : list_(0) { }
NodeListPtr(NodeList * list)397   NodeListPtr(NodeList *list) : list_(list) { addRef(); }
~NodeListPtr()398   ~NodeListPtr() { release(); }
NodeListPtr(const NodeListPtr & ptr)399   NodeListPtr(const NodeListPtr &ptr) : list_(ptr.list_) { addRef(); }
400   NodeListPtr &operator=(const NodeListPtr &ptr) {
401     ptr.addRef();
402     release();
403     list_ = ptr.list_;
404     return *this;
405   }
assignRest()406   AccessResult assignRest() { return list_->rest(*this); }
assignChunkRest()407   AccessResult assignChunkRest() { return list_->chunkRest(*this); }
408   NodeList *operator->() const { return list_; }
409   NodeList &operator*() const { return *list_; }
assign(NodeList * list)410   void assign(NodeList *list) {
411     if (list)
412       list->addRef();
413     release();
414     list_ = list;
415   }
clear()416   void clear() { release(); list_ = 0; }
417   operator bool() const { return list_ != 0; }
418 private:
addRef()419   void addRef() const { if (list_) list_->addRef(); }
release()420   void release() const { if (list_) list_->release(); }
421   NodeList *list_;
422 };
423 
424 class GROVE_API NamedNodeListPtr {
425 public:
NamedNodeListPtr()426   NamedNodeListPtr() : list_(0) { }
NamedNodeListPtr(NamedNodeList * list)427   NamedNodeListPtr(NamedNodeList *list) : list_(list) { addRef(); }
~NamedNodeListPtr()428   ~NamedNodeListPtr() { release(); }
NamedNodeListPtr(const NamedNodeListPtr & ptr)429   NamedNodeListPtr(const NamedNodeListPtr &ptr) : list_(ptr.list_) { addRef(); }
430   NamedNodeListPtr &operator=(const NamedNodeListPtr &ptr) {
431     ptr.addRef();
432     release();
433     list_ = ptr.list_;
434     return *this;
435   }
436   NamedNodeList *operator->() const { return list_; }
437   NamedNodeList &operator*() const { return *list_; }
assign(NamedNodeList * list)438   void assign(NamedNodeList *list) {
439     if (list)
440       list->addRef();
441     release();
442     list_ = list;
443   }
clear()444   void clear() { release(); list_ = 0; }
445   operator bool() const { return list_ != 0; }
446 private:
addRef()447   void addRef() const { if (list_) list_->addRef(); }
release()448   void release() const { if (list_) list_->release(); }
449   NamedNodeList *list_;
450 };
451 
452 class GROVE_API GroveString {
453 public:
454   typedef const GroveChar *const_iterator;
GroveString()455   GroveString() : data_(0), size_(0) { }
GroveString(const GroveChar * data,size_t size)456   GroveString(const GroveChar *data, size_t size)
457     : data_(data), size_(size) { }
size()458   size_t size() const { return size_; }
data()459   const GroveChar *data() const { return data_; }
assign(const GroveChar * data,size_t size)460   void assign(const GroveChar *data, size_t size) {
461     data_ = data;
462     size_ = size;
463   }
464   bool operator==(const GroveString &str) const;
465   bool operator!=(const GroveString &str) const { return !(*this == str); }
466   GroveChar operator[](size_t i) const { return data_[i]; }
begin()467   const_iterator begin() const { return data_; }
end()468   const_iterator end() const { return data_ + size_; }
469 private:
470   const GroveChar *data_;
471   size_t size_;
472 };
473 
474 class GROVE_API SdataMapper {
475 public:
476   virtual ~SdataMapper();
477   // Returns a pointer to a single character or null
478   virtual bool sdataMap(GroveString name, GroveString text, GroveChar &) const;
479 };
480 
481 // See Design Patterns.
482 
483 class GROVE_API NodeVisitor {
484 public:
485   virtual void sgmlDocument(Node &);
486   virtual void sgmlConstants(Node &);
487   virtual void dataChar(Node &);
488   virtual void element(Node &);
489   virtual void attributeAssignment(Node &);
490   virtual void attributeValueToken(Node &);
491   virtual void pi(Node &);
492   virtual void sdata(Node &);
493   virtual void documentType(Node &);
494   virtual void entity(Node &);
495   virtual void notation(Node &);
496   virtual void externalId(Node &);
497   virtual void externalData(Node &);
498   virtual void subdocument(Node &);
499   virtual void nonSgml(Node &);
500   virtual void message(Node &);
501 };
502 
503 class GROVE_API PropertyValue {
504 public:
~PropertyValue()505   virtual ~PropertyValue() { }
506   virtual void set(const NodePtr &) = 0;
507   virtual void set(const NodeListPtr &) = 0;
508   virtual void set(const NamedNodeListPtr &) = 0;
509   virtual void set(bool) = 0;
510   virtual void set(GroveChar) = 0;
511   virtual void set(GroveString) = 0;
512   virtual void set(ComponentName::Id) = 0;
513   virtual void set(const ComponentName::Id *) = 0;
514 };
515 
516 inline
getChar(const SdataMapper & mapper,GroveChar & c)517 AccessResult Node::getChar(const SdataMapper &mapper, GroveChar &c) const
518 {
519   GroveString str;
520   AccessResult ret = charChunk(mapper, str);
521   if (ret == accessOK)
522     c = str[0];
523   return ret;
524 }
525 
526 inline
sameGrove(const Node & node)527 bool Node::sameGrove(const Node &node) const
528 {
529   return groveIndex() == node.groveIndex();
530 }
531 
532 inline
getClassName(ComponentName::Id & name)533 AccessResult Node::getClassName(ComponentName::Id &name) const
534 {
535   name = classDef().className;
536   return accessOK;
537 }
538 
539 inline
getChildrenPropertyName(ComponentName::Id & name)540 AccessResult Node::getChildrenPropertyName(ComponentName::Id &name) const
541 {
542   const ClassDef &def = classDef();
543   if (def.childrenPropertyName == ComponentName::noId)
544     return accessNull;
545   name = def.childrenPropertyName;
546   return accessOK;
547 }
548 
549 inline
getDataPropertyName(ComponentName::Id & name)550 AccessResult Node::getDataPropertyName(ComponentName::Id &name) const
551 {
552   const ClassDef &def = classDef();
553   if (def.dataPropertyName == ComponentName::noId)
554     return accessNull;
555   name = def.dataPropertyName;
556   return accessOK;
557 }
558 
559 inline
getDataSepPropertyName(ComponentName::Id & name)560 AccessResult Node::getDataSepPropertyName(ComponentName::Id &name) const
561 {
562   const ClassDef &def = classDef();
563   if (def.dataSepPropertyName == ComponentName::noId)
564     return accessNull;
565   name = def.dataSepPropertyName;
566   return accessOK;
567 }
568 
569 inline
getSubnodePropertyNames(const ComponentName::Id * & names)570 AccessResult Node::getSubnodePropertyNames(const ComponentName::Id *&names) const
571 {
572   names = classDef().subnodePropertyNames;
573   return accessOK;
574 }
575 
576 inline
getAllPropertyNames(const ComponentName::Id * & names)577 AccessResult Node::getAllPropertyNames(const ComponentName::Id *&names) const
578 {
579    names = classDef().allPropertyNames;
580    return accessOK;
581 }
582 
583 #ifdef GROVE_NAMESPACE
584 }
585 #endif
586 
587 #endif /* not Node_INCLUDED */
588