1 /*
2   Copyright (c) 2006 - 2021
3   CLST  - Radboud University
4   ILK   - Tilburg University
5 
6   This file is part of libfolia
7 
8   libfolia is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 3 of the License, or
11   (at your option) any later version.
12 
13   libfolia is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, see <http://www.gnu.org/licenses/>.
20 
21   For questions and suggestions, see:
22       https://github.com/LanguageMachines/ticcutils/issues
23   or send mail to:
24       lamasoftware (at ) science.ru.nl
25 */
26 
27 #ifndef FOLIA_IMPL_H
28 #define FOLIA_IMPL_H
29 
30 #include <set>
31 #include <map>
32 #include <vector>
33 #include <string>
34 #include <iostream>
35 #include <exception>
36 #include "unicode/unistr.h"
37 #include "libxml/tree.h"
38 
39 #include "libfolia/folia_properties.h"
40 #include "libfolia/folia_metadata.h"
41 #include "libfolia/folia_textpolicy.h"
42 
43 using namespace icu;
44 
45 namespace folia {
46   class Document;
47   class AbstractSpanAnnotation;
48   class Alternative;
49   class PosAnnotation;
50   class LemmaAnnotation;
51   class SenseAnnotation;
52   class DomainAnnotation;
53   class MorphologyLayer;
54   class Morpheme;
55   class Phoneme;
56   class Sentence;
57   class Word;
58   class TextContent;
59   class PhonContent;
60   class Correction;
61   class New;
62   class Original;
63   class Current;
64   class Suggestion;
65   class Division;
66   class DependencyDependent;
67   class Paragraph;
68   class Morpheme;
69   class MetaData;
70 
71   /// class used to steer 'select()' behaviour
72   enum class SELECT_FLAGS {
73     RECURSE=0,  /*!< recurse the whole FoLia from the given node downwards
74 		  returning all matching nodes, even within matches.
75 		  This is the default.
76 		*/
77       LOCAL=1,  //!< only just look in the direct sibblings of the given node
78       TOP_HIT=2 //!< like recurse, but do NOT recurse into sibblings of matching nodes
79       };
80 
81   /// class used to steer 'xml:space' behaviour
82   enum class SPACE_FLAGS {
83     UNSET=-1,  //!< not yet known
84       DEFAULT=0,  //!< the default behaviour
85       PRESERVE=1 //!< spaces should be preserved
86       };
87 
88 
89 #define NOT_IMPLEMENTED {						\
90     throw NotImplementedError( xmltag() + "::" + std::string(__func__) ); \
91   }
92 
93   class FoliaElement {
94     friend std::ostream& operator<<( std::ostream&, const FoliaElement& );
95     friend std::ostream& operator<<( std::ostream&, const FoliaElement* );
96     friend bool operator==( const FoliaElement&, const FoliaElement& );
97     friend void destroy( FoliaElement * );
98   protected:
~FoliaElement()99     virtual ~FoliaElement(){};
100   public:
101     virtual void destroy() = 0;
init()102     virtual void init() {};
103     virtual size_t size() const = 0;
104     virtual FoliaElement* index( size_t ) const = 0;
105     virtual FoliaElement* rindex( size_t ) const = 0;
106     FoliaElement* operator[]( size_t i ) const {
107       return index(i);
108     }
109 
110     virtual bool isinstance( ElementType et ) const = 0;
111 
112     template <typename F>
isinstance()113       bool isinstance() const {
114       return element_id() == F::PROPS.ELEMENT_ID;
115     }
116 
117     template <typename T>
add_child(KWargs & args)118       inline T *add_child( KWargs& args ){
119       /// create a new FoliaElement of type T as child of this
120       /*!
121 	\param args an attribute-value list of arguments to use
122 	\return a new FoliaElement
123 	may throw if a Document is required, but not available
124       */
125       return new T( args, this );
126     }
127 
128     template <typename T>
add_child()129       inline T *add_child(){
130       /// create a new FoliaElement of type T as child of this
131       /*!
132 	\return a new FoliaElement
133 	may throw if a Document is required, but not available
134       */
135       return new T( this );
136     }
137 
138     template <typename T>
add_child(const std::string & txt)139       inline T *add_child( const std::string& txt ){
140       /// create a new XmlText as child of 'this'
141       /*!
142 	\param txt an value to be assigned as a "text" attribute
143 	\return a new FoliaElement
144 	this will not complie for any class that has NO IMPLEMENTATION for
145 	setvalue(). (which ar most classes)
146       */
147       T *result = new T(this);
148       result->setvalue( txt );
149       return result;
150     }
151 
152     bool isSubClass( ElementType ) const;
isSubClass(const FoliaElement * c)153     bool isSubClass( const FoliaElement *c ) const {
154       /// check if the object is a subclass of the class of \e c
155       /*!
156 	\param c the FoliaElement we would like to compare to
157 	\return true if the object is a SubClass of c.
158 	This is about C++ class inheritance: is our class a derivative of c's
159 	class?
160       */
161       return dynamic_cast<decltype(c)>(this) != 0;
162     };
163 
164     virtual void assignDoc( Document* ) = 0;
165     virtual FoliaElement *parent() const = 0;
166     virtual void set_parent( FoliaElement *p ) = 0;
167     virtual bool acceptable( ElementType ) const = 0;
168     virtual bool addable( const FoliaElement * ) const = 0;
169     virtual FoliaElement *append( FoliaElement* ) = 0;
170     virtual FoliaElement *postappend( ) = 0;
171     virtual void remove( FoliaElement * ) = 0;
172     virtual std::vector<FoliaElement*> find_replacables( FoliaElement * ) const = 0;
173     virtual void replace( FoliaElement * ) = 0;
174     virtual FoliaElement* replace( FoliaElement *, FoliaElement* ) = 0;
175     virtual void insert_after( FoliaElement *, FoliaElement * ) = 0;
176     virtual const std::vector<FoliaElement*>& data() const = 0;
177     virtual FoliaElement *head() const NOT_IMPLEMENTED;
178 
179     // Sentences
180     virtual Sentence *addSentence( const KWargs& ) = 0;
181     Sentence *addSentence( const std::string& s ="" ){
182       return addSentence( getArgs(s) );
183     };
184 
185     // Selections
186 
187     template <typename F>
188       std::vector<F*> select( const std::string& st,
189 			      const std::set<ElementType>& exclude,
190 			      bool recurse = true ) const {
191       std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID,
192 					       st,
193 					       exclude,
194 					       (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) );
195       std::vector<F*> res;
196       for ( size_t i = 0; i < tmp.size(); ++i ){
197 	res.push_back( dynamic_cast<F*>( tmp[i]) );
198       }
199       return res;
200     }
201 
202     template <typename F>
203       std::vector<F*> select( const std::string& st,
204 			      bool recurse = true ) const {
205       std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID,
206 					       st,
207 					       (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) );
208       std::vector<F*> res;
209       for ( size_t i = 0; i < tmp.size(); ++i ){
210 	res.push_back( dynamic_cast<F*>( tmp[i]) );
211       }
212       return res;
213     }
214 
215     template <typename F>
216       std::vector<F*> select( const char* st,
217 			      bool recurse = true ) const {
218       std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID,
219 					       std::string(st),
220 					       (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) );
221       std::vector<F*> res;
222       for ( size_t i = 0; i < tmp.size(); ++i ){
223 	res.push_back( dynamic_cast<F*>( tmp[i]) );
224       }
225       return res;
226     }
227 
228     template <typename F>
229       std::vector<F*> select( const std::set<ElementType>& exclude,
230 			      bool recurse = true ) const {
231       std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID,
232 					       exclude,
233 					       (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) );
234       std::vector<F*> res;
235       for ( size_t i = 0; i < tmp.size(); ++i ){
236 	res.push_back( dynamic_cast<F*>( tmp[i]) );
237       }
238       return res;
239     }
240 
241     template <typename F>
242       std::vector<F*> select( bool recurse = true ) const {
243       std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID,
244 					       (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) );
245       std::vector<F*> res;
246       for ( size_t i = 0; i < tmp.size(); ++i ){
247 	res.push_back( dynamic_cast<F*>( tmp[i]) );
248       }
249       return res;
250     }
251 
252     // annotations
253 
allowannotations()254     virtual bool allowannotations() const { return false; };
255     virtual const std::string annotator( ) const = 0;
256     virtual void annotator( const std::string& ) = 0;
257     virtual AnnotatorType annotatortype() const = 0;
258     virtual const std::string processor() const = 0;
259     virtual void processor( const std::string& ) = 0;
260     virtual void annotatortype( AnnotatorType t ) =  0;
261     virtual AnnotationType annotation_type() const = 0;
262     virtual PosAnnotation *addPosAnnotation( const KWargs& ) NOT_IMPLEMENTED;
263     virtual LemmaAnnotation *addLemmaAnnotation( const KWargs& ) NOT_IMPLEMENTED;
264     virtual MorphologyLayer *addMorphologyLayer( const KWargs& ) NOT_IMPLEMENTED;
265 
266     virtual PosAnnotation *getPosAnnotations( const std::string&,
267 					      std::vector<PosAnnotation*>& ) const NOT_IMPLEMENTED;
268     virtual LemmaAnnotation *getLemmaAnnotations( const std::string&,
269 						  std::vector<LemmaAnnotation*>& ) const NOT_IMPLEMENTED;
270     virtual MorphologyLayer *getMorphologyLayers( const std::string&,
271 						  std::vector<MorphologyLayer*>& ) const NOT_IMPLEMENTED;
272 
273     virtual const MetaData *get_metadata() const = 0;
274     virtual const std::string get_metadata( const std::string& ) const = 0;
275 
276     template <typename F>
277       std::vector<F*> annotations( const std::string& s = "" ) const {
278       if ( allowannotations() ){
279 	return select<F>( s, default_ignore_annotations );
280       }
281       else NOT_IMPLEMENTED;
282     }
283 
284     template <typename F>
285       bool has_annotation( const std::string& st = "" ) const {
286       std::vector<F*> v = annotations<F>( st );
287       return v.size() > 0;
288     }
289 
290     template <typename F>
291       F *annotation( const std::string& st = "" ) const {
292       std::vector<F*>v = annotations<F>( st );
293       if ( v.size() > 0 ){
294 	return v[0];
295       }
296       else {
297 	return 0;
298       }
299     }
300 
301     template <typename F>
addAnnotation(const KWargs & args)302       F *addAnnotation( const KWargs& args ) {
303       F *res = 0;
304       try {
305 	res = new F( args, doc() );
306       }
307       catch( std::exception& ){
308 	if ( res ){
309 	  res->destroy();
310 	}
311 	throw;
312       }
313       append( res );
314       return res;
315     }
316     // span annotation
317     virtual std::vector<AbstractSpanAnnotation*> selectSpan() const = 0;
318     virtual std::vector<AbstractSpanAnnotation*> findspans( ElementType,
319 							    const std::string& = "" ) const NOT_IMPLEMENTED;
320     template <typename F>
321       std::vector<AbstractSpanAnnotation*> findspans( const std::string& st = "" ) const {
322       return findspans( F::PROPS.ELEMENT_ID, st );
323     }
324     virtual AbstractSpanAnnotation *findspan( const std::vector<FoliaElement*>& ) const NOT_IMPLEMENTED;
325 
326     // features
327     virtual std::vector<std::string> feats( const std::string& ) const = 0;
328     virtual const std::string feat( const std::string& ) const = 0;
329 
330     //XML (de)serialisation
331     virtual FoliaElement* parseXml( const xmlNode * ) = 0;
332     const std::string xmlstring( bool=true ) const; // serialize to a string (XML fragment)
333     const std::string xmlstring( bool, int=0, bool=true ) const; // serialize to a string (XML fragment)
334     virtual xmlNode *xml( bool, bool = false ) const = 0; //serialize to XML
335 
336     // text/string content
337     bool hastext( const std::string& = "current" ) const;
338     bool hasphon( const std::string& = "current" ) const;
339     virtual void check_text_consistency(bool = true) const = 0;
340     virtual void check_text_consistency_while_parsing( bool = true,
341 						       bool = false ) = 0; //can't we merge these two somehow?
342     virtual void check_append_text_consistency( const FoliaElement * ) const = 0;
343 
344     virtual const std::string str( const std::string& = "current" ) const = 0;
345     virtual const std::string str( const TextPolicy& ) const = 0;
346 
347     const UnicodeString unicode( const std::string& cls = "current",
348 				 bool debug=false ) const {
349       return text( cls, TEXT_FLAGS::NONE, debug ); };
350 
351     virtual UnicodeString text_container_text( const TextPolicy& ) const = 0;
352     virtual const UnicodeString private_text( const TextPolicy& ) const = 0;
353     virtual const UnicodeString text( const TextPolicy & ) const = 0;
354     virtual const UnicodeString text( const std::string&,
355 				      TEXT_FLAGS = TEXT_FLAGS::NONE,
356 				      bool = false ) const = 0;
357     virtual const UnicodeString text( TEXT_FLAGS = TEXT_FLAGS::NONE,
358 				      bool = false ) const = 0;
359     const UnicodeString stricttext( const std::string& = "current" ) const;
360     const UnicodeString toktext( const std::string& = "current" ) const;
361     virtual const UnicodeString phon( const TextPolicy& ) const = 0;
362     virtual const UnicodeString phon( const std::string&,
363 				      TEXT_FLAGS = TEXT_FLAGS::NONE ) const = 0;
364     virtual const UnicodeString phon( TEXT_FLAGS = TEXT_FLAGS::NONE ) const = 0;
365     virtual bool printable() const = 0;
366     virtual bool speakable() const = 0;
367     virtual bool referable() const = 0;
368     virtual bool is_textcontainer() const = 0;
369     virtual bool is_phoncontainer() const = 0;
370     virtual bool implicitspace() const = 0;
371     virtual const std::string& text_delimiter() const = 0;
372     // Word
373     virtual Word *previous() const NOT_IMPLEMENTED;
374     virtual Word *next() const NOT_IMPLEMENTED;
375     virtual const Word* resolveword( const std::string& ) const = 0;
376     virtual std::vector<Word*> context( size_t,
377 					const std::string& ="" ) const NOT_IMPLEMENTED;
378     virtual std::vector<Word*> leftcontext( size_t,
379 					    const std::string& ="" ) const NOT_IMPLEMENTED;
380     virtual std::vector<Word*> rightcontext( size_t,
381 					     const std::string& ="" ) const NOT_IMPLEMENTED;
382     virtual Word *addWord( const KWargs& ) = 0;
383     virtual Word *addWord( const std::string& ="" ) = 0;
384 
385     // corrections
386     virtual New *getNew() const NOT_IMPLEMENTED;
387     virtual FoliaElement *getNew( size_t ) const NOT_IMPLEMENTED;
388     virtual Original *getOriginal() const NOT_IMPLEMENTED;
389     virtual FoliaElement *getOriginal( size_t ) const NOT_IMPLEMENTED;
390     virtual Current *getCurrent() const NOT_IMPLEMENTED;
391     virtual FoliaElement *getCurrent( size_t ) const NOT_IMPLEMENTED;
392     virtual Correction *incorrection() const NOT_IMPLEMENTED;
393     virtual Correction *split( FoliaElement *, FoliaElement *,
394 			       const std::string& = "" ) NOT_IMPLEMENTED;
395 
396     virtual Correction *mergewords( FoliaElement *,
397 				    const std::vector<FoliaElement *>&,
398 				    const std::string& = "" ) NOT_IMPLEMENTED;
399 
400     virtual Correction *deleteword( FoliaElement *,
401 				    const std::string& = "" ) NOT_IMPLEMENTED;
402     virtual Correction *insertword( FoliaElement *, FoliaElement *,
403 				    const std::string& = "" ) NOT_IMPLEMENTED;
404     virtual std::vector<Suggestion*> suggestions() const NOT_IMPLEMENTED;
405     virtual Suggestion *suggestions( size_t ) const NOT_IMPLEMENTED;
406 
407     virtual Correction *correct( const std::vector<FoliaElement*>&,
408 				 const std::vector<FoliaElement*>&,
409 				 const std::vector<FoliaElement*>&,
410 				 const std::vector<FoliaElement*>&,
411 				 const KWargs& ) NOT_IMPLEMENTED;
412     virtual Correction* correct( FoliaElement*,
413 				 FoliaElement*,
414 				 const KWargs& ) NOT_IMPLEMENTED;
415     virtual Correction* correct( FoliaElement*,
416 				 FoliaElement*,
417 				 const std::vector<FoliaElement*>&,
418 				 const KWargs& ) NOT_IMPLEMENTED;
419     virtual Correction *correct( const std::string& = "" ) NOT_IMPLEMENTED;
420 
421     // TextContent
422     virtual const TextContent *text_content( const TextPolicy& ) const = 0;
423     virtual const TextContent *text_content( const std::string& = "current",
424 					     bool debug = false ) const = 0;
425     TextContent *settext( const std::string&,
426 			  const std::string& = "current" );
427     TextContent *settext( const std::string&,
428 			  int,
429 			  const std::string& = "current" );
430     TextContent *setutext( const UnicodeString&,
431 			   const std::string& = "current" );
432     TextContent *setutext( const UnicodeString&,
433 			   int ,
434 			   const std::string& = "current" );
435     virtual int offset() const NOT_IMPLEMENTED;
436     virtual void set_offset( int ) const NOT_IMPLEMENTED;
437 
438     void clear_textcontent( const std::string& = "current" );
439     // PhonContent
440     virtual const PhonContent *phon_content( const TextPolicy& ) const = 0;
441     virtual const PhonContent *phon_content( const std::string& = "current",
442 					     bool debug=false ) const = 0;
443 
444     // properties
445     virtual const std::string& get_delimiter( const TextPolicy& ) const = 0;
446     virtual void setDateTime( const std::string& ) = 0;
447     virtual const std::string getDateTime() const = 0;
448     virtual const std::string pos( const std::string& = "" ) const NOT_IMPLEMENTED;
449     virtual const std::string lemma( const std::string& = "" ) const NOT_IMPLEMENTED;
450     virtual const std::string cls() const = 0;
451     virtual void set_cls( const std::string& ) = 0;
452     virtual const std::string sett() const = 0;
453     virtual void set_set( const std::string& ) = 0;
454     virtual const std::string n() const = 0;
455     virtual void set_n( const std::string& ) = 0;
456     virtual const std::string tag() const = 0;
457     virtual const std::string set_tag( const std::string& ) = 0;
458     virtual const std::string id() const = 0;
459     virtual const std::string begintime() const = 0;
460     virtual void set_begintime( const std::string& ) = 0;
461     virtual const std::string endtime() const = 0;
462     virtual void set_endtime( const std::string& ) = 0;
463     virtual const std::string speech_src() const = 0;
464     virtual void set_speech_src( const std::string& ) = 0;
465     virtual const std::string speech_speaker() const = 0;
466     virtual void set_speech_speaker( const std::string& ) = 0;
467     virtual const std::string language( const std::string& = "" ) const = 0;
468     virtual const std::string set_to_current() NOT_IMPLEMENTED;
469     virtual double confidence() const = 0;
470     virtual void set_confidence( double ) = 0;
471     virtual void confidence( double ) = 0; // deprecated
472     virtual bool space() const = 0;
473     virtual bool set_space( bool ) = 0;
474     virtual SPACE_FLAGS spaces_flag() const = 0;
475     virtual void set_spaces_flag( SPACE_FLAGS ) = 0;
476     virtual ElementType element_id() const = 0;
477     virtual size_t occurrences() const = 0;
478     virtual size_t occurrences_per_set() const = 0;
479     virtual Attrib required_attributes() const = 0;
480     virtual Attrib optional_attributes() const = 0;
481     virtual const std::string& xmltag() const = 0;
classname()482     const std::string& classname() const { return xmltag(); }; //synomym
483     virtual const std::string& default_subset() const = 0;
484     virtual const std::string subset() const NOT_IMPLEMENTED;
485     virtual bool setonly() const = 0;
486     virtual bool auto_generate_id() const = 0;
487     virtual Document *doc() const = 0;
488     virtual Sentence *sentence() const NOT_IMPLEMENTED;
489     virtual Paragraph *paragraph() const NOT_IMPLEMENTED;
490     virtual Division *division() const NOT_IMPLEMENTED;
491     virtual std::vector<Paragraph*> paragraphs() const NOT_IMPLEMENTED;
492     virtual std::vector<Sentence*> sentences() const NOT_IMPLEMENTED;
493     virtual std::vector<Word*> words( const std::string& ="" ) const NOT_IMPLEMENTED;
494     virtual std::vector<FoliaElement*> wrefs() const NOT_IMPLEMENTED;
495     virtual FoliaElement* wrefs( size_t ) const NOT_IMPLEMENTED;
496 
497     virtual std::vector<Morpheme*> morphemes( const std::string& ="" ) const NOT_IMPLEMENTED;
498     virtual Morpheme* morpheme( size_t, const std::string& ="" ) const NOT_IMPLEMENTED;
499     virtual Sentence *sentences( size_t ) const NOT_IMPLEMENTED;
500     virtual Sentence *rsentences( size_t ) const NOT_IMPLEMENTED;
501     virtual Paragraph *paragraphs( size_t ) const NOT_IMPLEMENTED;
502     virtual Paragraph *rparagraphs( size_t ) const NOT_IMPLEMENTED;
503     virtual Word *words( size_t, const std::string& ="" ) const NOT_IMPLEMENTED;
504     virtual std::vector<Word *> wordParts() const NOT_IMPLEMENTED;
505     virtual Word *rwords( size_t, const std::string& ="" ) const NOT_IMPLEMENTED;
506 
507     virtual DependencyDependent *dependent() const NOT_IMPLEMENTED;
508 
509     virtual const std::string description() const;
510 
511     // alternatives
512     virtual std::vector<Alternative *> alternatives( ElementType,
513 						     const std::string& = ""
514 						     ) const NOT_IMPLEMENTED;
515     std::vector<Alternative*> alternatives( const std::string& s = "" ) const {
516       return alternatives( BASE, s );
517     }
518 
519     virtual const std::string content() const NOT_IMPLEMENTED;
520     virtual const std::string src() const NOT_IMPLEMENTED;
521     virtual const UnicodeString caption() const NOT_IMPLEMENTED;
522     virtual std::vector<FoliaElement *> resolve() const NOT_IMPLEMENTED;
523     virtual const FoliaElement* resolveid() const NOT_IMPLEMENTED;
524     virtual bool checkAtts() = 0;
525     virtual const UnicodeString deeptext( const TextPolicy& ) const NOT_IMPLEMENTED;
526     virtual const UnicodeString deepphon( const TextPolicy& ) const NOT_IMPLEMENTED;
527 
528 
529     virtual std::vector<FoliaElement*> select( ElementType,
530 					       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0;
531     virtual std::vector<FoliaElement*> select( ElementType,
532 					       const std::set<ElementType>& ,
533 					       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0;
534     virtual std::vector<FoliaElement*> select( ElementType,
535 					       const std::string&,
536 					       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0;
537     virtual std::vector<FoliaElement*> select( ElementType,
538 					       const std::string&,
539 					       const std::set<ElementType>& ,
540 					       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0;
541     // some 'internal stuff
542     virtual int refcount() const = 0;
543     virtual void increfcount() = 0;
544     virtual void decrefcount() = 0;
545     virtual void resetrefcount() = 0;
546     virtual void setAttributes( KWargs& ) = 0;
547     virtual KWargs collectAttributes() const = 0;
548     virtual void setAuth( bool b ) = 0;
549     virtual bool auth( ) const = 0;
550     virtual bool xlink() const = 0;
551     virtual const std::string href() const NOT_IMPLEMENTED;
552     virtual const std::string generateId( const std::string& ) NOT_IMPLEMENTED;
553     virtual const std::string textclass() const NOT_IMPLEMENTED;
554     virtual void unravel( std::set<FoliaElement*>& ) NOT_IMPLEMENTED;
555     static FoliaElement *private_createElement( ElementType );
556   public:
557     static FoliaElement *createElement( ElementType, Document * =0 );
558     static FoliaElement *createElement( const std::string&, Document * =0 );
559 
560   };
561 
562   class AbstractElement: public virtual FoliaElement {
563     friend void destroy( FoliaElement * );
564   private:
565     //Constructor
566     AbstractElement( const AbstractElement& ); // inhibit copies
567     AbstractElement& operator=( const AbstractElement& ); // inhibit copies
568   protected:
569     AbstractElement( const properties& p, Document* = 0 );
570     AbstractElement( const properties& p, FoliaElement * );
571     virtual ~AbstractElement();
572   public:
573     void destroy();
574     void classInit();
575     void classInit( const KWargs& );
576 
577     //functions regarding contained data
size()578     size_t size() const { return _data.size(); };
579     FoliaElement* index( size_t ) const;
580     FoliaElement* rindex( size_t ) const;
581 
isinstance(ElementType et)582     bool isinstance( ElementType et ) const {
583       /// return true when the object is an instance of the type parameter
584       /*!
585       \param et the type to check against
586     */
587       return et == element_id();
588     }
589 
590     void assignDoc( Document* );
parent()591     FoliaElement *parent() const { return _parent; };
set_parent(FoliaElement * p)592     void set_parent( FoliaElement *p ) { _parent = p ; };
593 
594     // modify the internal data
595     FoliaElement *append( FoliaElement* );
596     FoliaElement *postappend( );
597     void remove( FoliaElement * );
598     std::vector<FoliaElement*> find_replacables( FoliaElement * ) const;
599     void replace( FoliaElement * );
600     FoliaElement* replace( FoliaElement *, FoliaElement* );
601     void insert_after( FoliaElement *, FoliaElement * );
data()602     const std::vector<FoliaElement*>& data() const { return _data; };
603 
604     // Sentences
605     Sentence *addSentence( const KWargs& );
606 
607     // MetaData
608     const MetaData *get_metadata() const;
609     const std::string get_metadata( const std::string&  ) const;
610 
611     // Selections
612     template <typename F>
613       std::vector<F*> select( bool recurse = true ) const {
614       return FoliaElement::select<F>(recurse);
615     }
616 
617     template <typename F>
618       std::vector<F*> select( const std::string& st,
619 			      const std::set<ElementType>& exclude,
620 			      bool recurse = true ) const {
621       return FoliaElement::select<F>( st, exclude, recurse );
622     }
623 
624     template <typename F>
625       std::vector<F*> select( const std::string& st,
626 			      bool recurse = true ) const {
627       return FoliaElement::select<F>( st, recurse );
628     }
629 
630     template <typename F>
631       std::vector<F*> select( const char* st,
632 			      bool recurse = true ) const {
633       return FoliaElement::select<F>( st, recurse );
634     }
635 
636     template <typename F>
637       std::vector<F*> select( const std::set<ElementType>& exclude,
638 			      bool recurse = true ) const {
639       return FoliaElement::select<F>( exclude, recurse );
640     }
641 
annotator()642     const std::string annotator( ) const { return _annotator; };
annotator(const std::string & a)643     void annotator( const std::string& a ) { _annotator = a; };
processor()644     const std::string processor( ) const { return _processor; };
processor(const std::string & p)645     void processor( const std::string& p ) { _processor = p; };
annotatortype()646     AnnotatorType annotatortype() const { return _annotator_type; };
annotatortype(AnnotatorType t)647     void annotatortype( AnnotatorType t ) { _annotator_type =  t; };
648 
649     template <typename F>
addAnnotation(const KWargs & args)650       F *addAnnotation( const KWargs& args ) {
651       return FoliaElement::addAnnotation<F>( args );
652     }
653 
654     // Span annotations
655     std::vector<AbstractSpanAnnotation*> selectSpan() const;
656 
657     // features
658     std::vector<std::string> feats( const std::string& ) const;
659     const std::string feat( const std::string& ) const;
660 
661     //XML parsing
662     FoliaElement* parseXml( const xmlNode * );
663 
664     // text/string content
665 
666     const std::string str( const std::string& = "current" ) const;
667     const std::string str( const TextPolicy& ) const;
668 
669     const UnicodeString private_text( const TextPolicy& ) const;
670     const UnicodeString text( const TextPolicy & ) const;
671     const UnicodeString text( const std::string&,
672 			      TEXT_FLAGS = TEXT_FLAGS::NONE,
673 			      bool = false ) const;
674     const UnicodeString text( TEXT_FLAGS flags = TEXT_FLAGS::NONE,
675 			      bool debug = false ) const {
676       return text( "current", flags, debug );
677     }
678 
679     const UnicodeString phon( const TextPolicy& ) const;
680     const UnicodeString phon( const std::string&,
681 			      TEXT_FLAGS = TEXT_FLAGS::NONE ) const;
682     const UnicodeString phon( TEXT_FLAGS flags = TEXT_FLAGS::NONE ) const {
683       return phon( "current", flags );
684     }
685 
686     const UnicodeString deeptext( const TextPolicy& ) const;
687     const UnicodeString deepphon( const TextPolicy& ) const;
688 
689     // Word
resolveword(const std::string &)690     const Word* resolveword( const std::string& ) const { return 0; };
691     Word *addWord( const KWargs& );
692     Word *addWord( const std::string& ="" );
693     // TextContent
694     const TextContent *text_content( const TextPolicy& ) const;
695     const TextContent *text_content( const std::string& = "current",
696 				     bool = false ) const;
697     // PhonContent
698     const PhonContent *phon_content( const TextPolicy& tp ) const;
699     const PhonContent *phon_content( const std::string& = "current",
700 				     bool = false ) const;
701 
702     // properties
703     const std::string& get_delimiter( const TextPolicy& ) const;
704 
705     // attributes
cls()706     const std::string cls() const { return _class; };
set_cls(const std::string & cls)707     void set_cls( const std::string& cls ){ _class = cls; };
update_cls(const std::string & c)708     void update_cls( const std::string& c ) { set_cls( c ); } // deprecated
709 
sett()710     const std::string sett() const { return _set; };
set_set(const std::string & st)711     void set_set( const std::string& st ){ _set = st; };
712 
tag()713     const std::string tag() const { return _tags; };
714     const std::string set_tag( const std::string&  );
settag(const std::string & t)715     const std::string settag( const std::string& t ){
716       return set_tag(t); };                              //deprecated
717 
n()718     const std::string n() const { return _n; };
set_n(const std::string & n)719     void set_n( const std::string& n ){ _n = n; };
720 
id()721     const std::string id() const { return _id; };
722 
begintime()723     const std::string begintime() const { return _begintime; };
set_begintime(const std::string & bt)724     void set_begintime( const std::string& bt ){ _begintime = bt; };
725 
endtime()726     const std::string endtime() const { return _endtime; };
set_endtime(const std::string & bt)727     void set_endtime( const std::string& bt ){ _endtime = bt; };
728 
textclass()729     const std::string textclass() const { return _textclass; };
textclass(const std::string & tc)730     void textclass( const std::string& tc ){ _textclass = tc; };
731 
732     const std::string speech_src() const;
733     void set_speech_src( const std::string& ) NOT_IMPLEMENTED;
734 
735     const std::string speech_speaker() const;
736     void set_speech_speaker( const std::string& ) NOT_IMPLEMENTED;
737 
space()738     bool space() const { return _space; };
set_space(bool b)739     bool set_space( bool b ) { bool s =_space; _space =  b; return s; };
740 
spaces_flag()741     SPACE_FLAGS spaces_flag() const { return _preserve_spaces; };
set_spaces_flag(SPACE_FLAGS f)742     void set_spaces_flag( SPACE_FLAGS f ) { _preserve_spaces = f; };
743 
confidence()744     double confidence() const { return _confidence; };
confidence(double d)745     void confidence( double d ) { _confidence = d; };
set_confidence(double d)746     void set_confidence( double d ) { _confidence = d; };
747 
748     const std::string language( const std::string& = "" ) const;
src()749     const std::string src() const { return _src; };
750     // generic properties
751     ElementType element_id() const;
752     size_t occurrences() const;
753     size_t occurrences_per_set() const;
754     Attrib required_attributes() const;
755     Attrib optional_attributes() const;
756     bool hidden() const;
757     const std::string& xmltag() const;
758     const std::string& default_subset() const;
759     AnnotationType annotation_type() const;
760     const std::set<ElementType>& accepted_data() const;
761     const std::set<ElementType>& required_data() const;
762     bool printable() const;
763     bool speakable() const;
764     bool referable() const;
765     bool is_textcontainer() const;
766     bool is_phoncontainer() const;
767     bool implicitspace() const;
768     const std::string& text_delimiter() const;
769     bool auth() const;
770     bool xlink() const;
771     bool setonly() const;
772     bool auto_generate_id() const;
773 
doc()774     Document *doc() const { return _mydoc; };
775 
776 
777     std::vector<FoliaElement*> select( ElementType,
778 				       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const;
779     std::vector<FoliaElement*> select( ElementType,
780 				       const std::set<ElementType>& ,
781 				       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const;
782     std::vector<FoliaElement*> select( ElementType,
783 				       const std::string&,
784 				       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const;
785     std::vector<FoliaElement*> select( ElementType,
786 				       const std::string&,
787 				       const std::set<ElementType>& ,
788 				       SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const;
789 
790     void unravel( std::set<FoliaElement*>& );
791 
792   protected:
793     xmlNode *xml( bool, bool = false ) const;
794     void setAttributes( KWargs& );
795     KWargs collectAttributes() const;
796     xmlNs *foliaNs() const;
797     bool addable( const FoliaElement * ) const;
798 
799   private:
refcount()800     int refcount() const { return _refcount; };
increfcount()801     void increfcount() { ++_refcount; };
decrefcount()802     void decrefcount() { --_refcount; };
resetrefcount()803     void resetrefcount() { _refcount = 0; };
setAuth(bool b)804     void setAuth( bool b ){ _auth = b; };
805     void setDateTime( const std::string& );
806     const std::string getDateTime() const;
807     bool checkAtts();
808     void set_typegroup( KWargs& ) const;
809     bool acceptable( ElementType ) const;
810     UnicodeString text_container_text( const TextPolicy& ) const;
811     void check_text_consistency(bool = true) const;
812     void check_text_consistency_while_parsing( bool = true,
813 					       bool = false ); //can't we merge these two somehow?
814     void check_append_text_consistency( const FoliaElement * ) const;
815     void check_set_declaration();
816     void addFeatureNodes( const KWargs& args );
817     Document *_mydoc;
818     FoliaElement *_parent;
819     bool _auth;
820     bool _space;
821     AnnotatorType _annotator_type;
822     int _refcount;
823     double _confidence;
824     std::string _annotator;
825     std::string _n;
826     std::string _datetime;
827     std::string _begintime;
828     std::string _endtime;
829     std::string _speaker;
830     std::string _textclass;
831     std::string _metadata;
832     std::string _processor;
833     std::string _set;
834     std::string _class;
835     std::string _id;
836     std::string _src;
837     std::string _tags;
838     SPACE_FLAGS _preserve_spaces;
839     std::vector<FoliaElement*> _data;
840     const properties& _props;
841   };
842 
843   bool isSubClass( const ElementType e1, const ElementType e2 );
844 
845   bool isSubClass( const FoliaElement *e1, const FoliaElement *e2 );
846 
847   template <typename T1, typename T2>
isSubClass()848     bool isSubClass(){
849     /// templated check if Type T1 is a subclass of Type T2
850     /*!
851       \return true if T1 is a SubClass of T2.
852       This is about C++ class inheritance: is our class a derivative of c's
853       class?
854     */
855     return isSubClass( T1::PROPS.ELEMENT_ID, T2::PROPS.ELEMENT_ID );
856   }
857 
858   bool operator==( const FoliaElement&, const FoliaElement& );
859   inline bool operator!=( const FoliaElement& e1, const FoliaElement& e2 ){
860     return !( e1 == e2 );
861   }
862 
len(const FoliaElement * e)863   inline size_t len( const FoliaElement *e ) {
864     /// return the number of FoliaElement children of '\e e
865     return e->size();
866   }
867 
868   template <typename T>
len(const std::vector<T> & v)869     inline size_t len( const std::vector<T>& v ) {
870     /// return the size of the given vector
871     return v.size(); }
872 
873   inline const std::string str( const FoliaElement *e,
874 				const std::string& cls = "current" ) {
875     /// return the string value contained in \e e
876     /*!
877       \param e The FoliaElement
878       \param cls the textclass we want
879       \return the (UTF8) string value
880     */
881     return e->str( cls ); }
882 
str(const FoliaElement * e,const TextPolicy & tp)883   inline const std::string str( const FoliaElement *e,
884 				const TextPolicy& tp ){
885     /// return the string value contained in \e e
886     /*!
887       \param e The FoliaElement
888       \param tp the TextPolicy to use
889       \return the (UTF8) string value
890     */
891     return e->str( tp );
892   }
893 
text(const FoliaElement * e,const TextPolicy & tp)894   inline const UnicodeString text( const FoliaElement *e,
895 				   const TextPolicy& tp ){
896     /// return the Unicode value contained in \e e
897     /*!
898       \param e The FoliaElement
899       \param tp the TextPolicy to use
900       \return the Unicode string value
901     */
902     if ( e ){
903       return e->text( tp );
904     }
905     else {
906       throw ValueError( "text() for empty element" );
907     }
908   }
909 
910   inline const UnicodeString text( const FoliaElement *e,
911 				   const std::string& cls = "current",
912 				   bool debug = false ) {
913     /// return the Unicode value contained in \e e
914     /*!
915       \param e The FoliaElement
916       \param cls the textclass we want
917       \param debug enables debugging when true
918       \return the Unicode string value
919     */
920     if ( e ){
921       return e->text( cls, TEXT_FLAGS::NONE, debug );
922     }
923     else {
924       throw ValueError( "text() for empty element" );
925     }
926   }
927 
unicode(const FoliaElement * e)928   inline const UnicodeString unicode( const FoliaElement *e ) {
929     /// return the Unicode value contained in \e e
930     /*!
931       \param e The FoliaElement
932       \return the Unicode string value
933     */
934     return e->unicode(); }
935 
isinstance(const FoliaElement * e,ElementType t)936   inline bool isinstance( const FoliaElement *e, ElementType t ) {
937     /// return true when the first parameter is an instance of the type
938     /// given by the second parameter
939     /*!
940       \param e the FoliaElement to test
941       \param t the type to check against
942     */
943     return e->isinstance( t ); }
944 
945   class AllowGenerateID: public virtual FoliaElement {
946   public:
947     void setMaxId( FoliaElement * );
948     const std::string generateId( const std::string& tag );
949   private:
950     std::map<std::string, int> id_map;
951   };
952 
953   class AllowCorrections: public virtual FoliaElement {
954     /// Interface class that enables corrections on Elements
955   public:
956     Correction *correct( const std::vector<FoliaElement*>&,
957 			 const std::vector<FoliaElement*>&,
958  			 const std::vector<FoliaElement*>&,
959 			 const std::vector<FoliaElement*>&,
960 			 const KWargs& );
961     Correction *correct( FoliaElement*,
962 			 FoliaElement*,
963 			 const KWargs& );
964     Correction* correct( FoliaElement*,
965 			 FoliaElement*,
966 			 const std::vector<FoliaElement*>&,
967 			 const KWargs& );
968     Correction *correct( const std::string& = "" );
969   };
970 
971   class AllowXlink: public virtual FoliaElement {
972   public:
973     const std::string href() const;
974     void setAttributes( KWargs& );
975     KWargs collectAttributes() const;
976   protected:
977     std::map<std::string,std::string> _xlink;
978   };
979 
980   class AllowInlineAnnotation: public AllowCorrections {
981   public:
allowannotations()982     bool allowannotations() const { return true; };
983     template <typename F>
984       std::vector<F*> annotations( const std::string& s = "" ) const {
985       return FoliaElement::annotations<F>( s );
986     }
987 
988     template <typename F>
989       int has_annotation( const std::string& st = "" ) const {
990       return FoliaElement::has_annotation<F>(st);
991     }
992 
993     template <typename F>
994       F *annotation( const std::string& st = "" ) const {
995       return FoliaElement::annotation<F>(st);
996     }
997 
998     std::vector<Alternative *> alternatives( ElementType = BASE,
999 					     const std::string& = "" ) const;
1000 
1001 
1002     PosAnnotation *addPosAnnotation( const KWargs& );
1003     PosAnnotation *getPosAnnotations( const std::string&,
1004 				      std::vector<PosAnnotation*>& ) const;
1005     LemmaAnnotation *addLemmaAnnotation( const KWargs& );
1006     LemmaAnnotation *getLemmaAnnotations( const std::string&,
1007 					  std::vector<LemmaAnnotation*>& ) const;
1008 
1009   };
1010   std::string VersionName();
1011   std::string Version();
1012 
1013   UnicodeString trim_space( const UnicodeString& in );
1014   UnicodeString postprocess_spaces( const UnicodeString& in );
1015   std::string tagToAtt( const FoliaElement* );
1016   void destroy( FoliaElement *el );
1017 
1018 } // namespace folia
1019 
1020 #endif // FOLIA_IMPL_H
1021