1 /* 2 Copyright (c) 2006 - 2021 3 CLST - Radboud University 4 ILK - Tilburg University 5 6 This file is part of libfolia 7 8 libfolia is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 libfolia is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, see <http://www.gnu.org/licenses/>. 20 21 For questions and suggestions, see: 22 https://github.com/LanguageMachines/ticcutils/issues 23 or send mail to: 24 lamasoftware (at ) science.ru.nl 25 */ 26 27 #ifndef FOLIA_IMPL_H 28 #define FOLIA_IMPL_H 29 30 #include <set> 31 #include <map> 32 #include <vector> 33 #include <string> 34 #include <iostream> 35 #include <exception> 36 #include "unicode/unistr.h" 37 #include "libxml/tree.h" 38 39 #include "libfolia/folia_properties.h" 40 #include "libfolia/folia_metadata.h" 41 #include "libfolia/folia_textpolicy.h" 42 43 using namespace icu; 44 45 namespace folia { 46 class Document; 47 class AbstractSpanAnnotation; 48 class Alternative; 49 class PosAnnotation; 50 class LemmaAnnotation; 51 class SenseAnnotation; 52 class DomainAnnotation; 53 class MorphologyLayer; 54 class Morpheme; 55 class Phoneme; 56 class Sentence; 57 class Word; 58 class TextContent; 59 class PhonContent; 60 class Correction; 61 class New; 62 class Original; 63 class Current; 64 class Suggestion; 65 class Division; 66 class DependencyDependent; 67 class Paragraph; 68 class Morpheme; 69 class MetaData; 70 71 /// class used to steer 'select()' behaviour 72 enum class SELECT_FLAGS { 73 RECURSE=0, /*!< recurse the whole FoLia from the given node downwards 74 returning all matching nodes, even within matches. 75 This is the default. 76 */ 77 LOCAL=1, //!< only just look in the direct sibblings of the given node 78 TOP_HIT=2 //!< like recurse, but do NOT recurse into sibblings of matching nodes 79 }; 80 81 /// class used to steer 'xml:space' behaviour 82 enum class SPACE_FLAGS { 83 UNSET=-1, //!< not yet known 84 DEFAULT=0, //!< the default behaviour 85 PRESERVE=1 //!< spaces should be preserved 86 }; 87 88 89 #define NOT_IMPLEMENTED { \ 90 throw NotImplementedError( xmltag() + "::" + std::string(__func__) ); \ 91 } 92 93 class FoliaElement { 94 friend std::ostream& operator<<( std::ostream&, const FoliaElement& ); 95 friend std::ostream& operator<<( std::ostream&, const FoliaElement* ); 96 friend bool operator==( const FoliaElement&, const FoliaElement& ); 97 friend void destroy( FoliaElement * ); 98 protected: ~FoliaElement()99 virtual ~FoliaElement(){}; 100 public: 101 virtual void destroy() = 0; init()102 virtual void init() {}; 103 virtual size_t size() const = 0; 104 virtual FoliaElement* index( size_t ) const = 0; 105 virtual FoliaElement* rindex( size_t ) const = 0; 106 FoliaElement* operator[]( size_t i ) const { 107 return index(i); 108 } 109 110 virtual bool isinstance( ElementType et ) const = 0; 111 112 template <typename F> isinstance()113 bool isinstance() const { 114 return element_id() == F::PROPS.ELEMENT_ID; 115 } 116 117 template <typename T> add_child(KWargs & args)118 inline T *add_child( KWargs& args ){ 119 /// create a new FoliaElement of type T as child of this 120 /*! 121 \param args an attribute-value list of arguments to use 122 \return a new FoliaElement 123 may throw if a Document is required, but not available 124 */ 125 return new T( args, this ); 126 } 127 128 template <typename T> add_child()129 inline T *add_child(){ 130 /// create a new FoliaElement of type T as child of this 131 /*! 132 \return a new FoliaElement 133 may throw if a Document is required, but not available 134 */ 135 return new T( this ); 136 } 137 138 template <typename T> add_child(const std::string & txt)139 inline T *add_child( const std::string& txt ){ 140 /// create a new XmlText as child of 'this' 141 /*! 142 \param txt an value to be assigned as a "text" attribute 143 \return a new FoliaElement 144 this will not complie for any class that has NO IMPLEMENTATION for 145 setvalue(). (which ar most classes) 146 */ 147 T *result = new T(this); 148 result->setvalue( txt ); 149 return result; 150 } 151 152 bool isSubClass( ElementType ) const; isSubClass(const FoliaElement * c)153 bool isSubClass( const FoliaElement *c ) const { 154 /// check if the object is a subclass of the class of \e c 155 /*! 156 \param c the FoliaElement we would like to compare to 157 \return true if the object is a SubClass of c. 158 This is about C++ class inheritance: is our class a derivative of c's 159 class? 160 */ 161 return dynamic_cast<decltype(c)>(this) != 0; 162 }; 163 164 virtual void assignDoc( Document* ) = 0; 165 virtual FoliaElement *parent() const = 0; 166 virtual void set_parent( FoliaElement *p ) = 0; 167 virtual bool acceptable( ElementType ) const = 0; 168 virtual bool addable( const FoliaElement * ) const = 0; 169 virtual FoliaElement *append( FoliaElement* ) = 0; 170 virtual FoliaElement *postappend( ) = 0; 171 virtual void remove( FoliaElement * ) = 0; 172 virtual std::vector<FoliaElement*> find_replacables( FoliaElement * ) const = 0; 173 virtual void replace( FoliaElement * ) = 0; 174 virtual FoliaElement* replace( FoliaElement *, FoliaElement* ) = 0; 175 virtual void insert_after( FoliaElement *, FoliaElement * ) = 0; 176 virtual const std::vector<FoliaElement*>& data() const = 0; 177 virtual FoliaElement *head() const NOT_IMPLEMENTED; 178 179 // Sentences 180 virtual Sentence *addSentence( const KWargs& ) = 0; 181 Sentence *addSentence( const std::string& s ="" ){ 182 return addSentence( getArgs(s) ); 183 }; 184 185 // Selections 186 187 template <typename F> 188 std::vector<F*> select( const std::string& st, 189 const std::set<ElementType>& exclude, 190 bool recurse = true ) const { 191 std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID, 192 st, 193 exclude, 194 (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) ); 195 std::vector<F*> res; 196 for ( size_t i = 0; i < tmp.size(); ++i ){ 197 res.push_back( dynamic_cast<F*>( tmp[i]) ); 198 } 199 return res; 200 } 201 202 template <typename F> 203 std::vector<F*> select( const std::string& st, 204 bool recurse = true ) const { 205 std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID, 206 st, 207 (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) ); 208 std::vector<F*> res; 209 for ( size_t i = 0; i < tmp.size(); ++i ){ 210 res.push_back( dynamic_cast<F*>( tmp[i]) ); 211 } 212 return res; 213 } 214 215 template <typename F> 216 std::vector<F*> select( const char* st, 217 bool recurse = true ) const { 218 std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID, 219 std::string(st), 220 (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) ); 221 std::vector<F*> res; 222 for ( size_t i = 0; i < tmp.size(); ++i ){ 223 res.push_back( dynamic_cast<F*>( tmp[i]) ); 224 } 225 return res; 226 } 227 228 template <typename F> 229 std::vector<F*> select( const std::set<ElementType>& exclude, 230 bool recurse = true ) const { 231 std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID, 232 exclude, 233 (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) ); 234 std::vector<F*> res; 235 for ( size_t i = 0; i < tmp.size(); ++i ){ 236 res.push_back( dynamic_cast<F*>( tmp[i]) ); 237 } 238 return res; 239 } 240 241 template <typename F> 242 std::vector<F*> select( bool recurse = true ) const { 243 std::vector<FoliaElement*> tmp = select( F::PROPS.ELEMENT_ID, 244 (recurse?SELECT_FLAGS::RECURSE : SELECT_FLAGS::LOCAL) ); 245 std::vector<F*> res; 246 for ( size_t i = 0; i < tmp.size(); ++i ){ 247 res.push_back( dynamic_cast<F*>( tmp[i]) ); 248 } 249 return res; 250 } 251 252 // annotations 253 allowannotations()254 virtual bool allowannotations() const { return false; }; 255 virtual const std::string annotator( ) const = 0; 256 virtual void annotator( const std::string& ) = 0; 257 virtual AnnotatorType annotatortype() const = 0; 258 virtual const std::string processor() const = 0; 259 virtual void processor( const std::string& ) = 0; 260 virtual void annotatortype( AnnotatorType t ) = 0; 261 virtual AnnotationType annotation_type() const = 0; 262 virtual PosAnnotation *addPosAnnotation( const KWargs& ) NOT_IMPLEMENTED; 263 virtual LemmaAnnotation *addLemmaAnnotation( const KWargs& ) NOT_IMPLEMENTED; 264 virtual MorphologyLayer *addMorphologyLayer( const KWargs& ) NOT_IMPLEMENTED; 265 266 virtual PosAnnotation *getPosAnnotations( const std::string&, 267 std::vector<PosAnnotation*>& ) const NOT_IMPLEMENTED; 268 virtual LemmaAnnotation *getLemmaAnnotations( const std::string&, 269 std::vector<LemmaAnnotation*>& ) const NOT_IMPLEMENTED; 270 virtual MorphologyLayer *getMorphologyLayers( const std::string&, 271 std::vector<MorphologyLayer*>& ) const NOT_IMPLEMENTED; 272 273 virtual const MetaData *get_metadata() const = 0; 274 virtual const std::string get_metadata( const std::string& ) const = 0; 275 276 template <typename F> 277 std::vector<F*> annotations( const std::string& s = "" ) const { 278 if ( allowannotations() ){ 279 return select<F>( s, default_ignore_annotations ); 280 } 281 else NOT_IMPLEMENTED; 282 } 283 284 template <typename F> 285 bool has_annotation( const std::string& st = "" ) const { 286 std::vector<F*> v = annotations<F>( st ); 287 return v.size() > 0; 288 } 289 290 template <typename F> 291 F *annotation( const std::string& st = "" ) const { 292 std::vector<F*>v = annotations<F>( st ); 293 if ( v.size() > 0 ){ 294 return v[0]; 295 } 296 else { 297 return 0; 298 } 299 } 300 301 template <typename F> addAnnotation(const KWargs & args)302 F *addAnnotation( const KWargs& args ) { 303 F *res = 0; 304 try { 305 res = new F( args, doc() ); 306 } 307 catch( std::exception& ){ 308 if ( res ){ 309 res->destroy(); 310 } 311 throw; 312 } 313 append( res ); 314 return res; 315 } 316 // span annotation 317 virtual std::vector<AbstractSpanAnnotation*> selectSpan() const = 0; 318 virtual std::vector<AbstractSpanAnnotation*> findspans( ElementType, 319 const std::string& = "" ) const NOT_IMPLEMENTED; 320 template <typename F> 321 std::vector<AbstractSpanAnnotation*> findspans( const std::string& st = "" ) const { 322 return findspans( F::PROPS.ELEMENT_ID, st ); 323 } 324 virtual AbstractSpanAnnotation *findspan( const std::vector<FoliaElement*>& ) const NOT_IMPLEMENTED; 325 326 // features 327 virtual std::vector<std::string> feats( const std::string& ) const = 0; 328 virtual const std::string feat( const std::string& ) const = 0; 329 330 //XML (de)serialisation 331 virtual FoliaElement* parseXml( const xmlNode * ) = 0; 332 const std::string xmlstring( bool=true ) const; // serialize to a string (XML fragment) 333 const std::string xmlstring( bool, int=0, bool=true ) const; // serialize to a string (XML fragment) 334 virtual xmlNode *xml( bool, bool = false ) const = 0; //serialize to XML 335 336 // text/string content 337 bool hastext( const std::string& = "current" ) const; 338 bool hasphon( const std::string& = "current" ) const; 339 virtual void check_text_consistency(bool = true) const = 0; 340 virtual void check_text_consistency_while_parsing( bool = true, 341 bool = false ) = 0; //can't we merge these two somehow? 342 virtual void check_append_text_consistency( const FoliaElement * ) const = 0; 343 344 virtual const std::string str( const std::string& = "current" ) const = 0; 345 virtual const std::string str( const TextPolicy& ) const = 0; 346 347 const UnicodeString unicode( const std::string& cls = "current", 348 bool debug=false ) const { 349 return text( cls, TEXT_FLAGS::NONE, debug ); }; 350 351 virtual UnicodeString text_container_text( const TextPolicy& ) const = 0; 352 virtual const UnicodeString private_text( const TextPolicy& ) const = 0; 353 virtual const UnicodeString text( const TextPolicy & ) const = 0; 354 virtual const UnicodeString text( const std::string&, 355 TEXT_FLAGS = TEXT_FLAGS::NONE, 356 bool = false ) const = 0; 357 virtual const UnicodeString text( TEXT_FLAGS = TEXT_FLAGS::NONE, 358 bool = false ) const = 0; 359 const UnicodeString stricttext( const std::string& = "current" ) const; 360 const UnicodeString toktext( const std::string& = "current" ) const; 361 virtual const UnicodeString phon( const TextPolicy& ) const = 0; 362 virtual const UnicodeString phon( const std::string&, 363 TEXT_FLAGS = TEXT_FLAGS::NONE ) const = 0; 364 virtual const UnicodeString phon( TEXT_FLAGS = TEXT_FLAGS::NONE ) const = 0; 365 virtual bool printable() const = 0; 366 virtual bool speakable() const = 0; 367 virtual bool referable() const = 0; 368 virtual bool is_textcontainer() const = 0; 369 virtual bool is_phoncontainer() const = 0; 370 virtual bool implicitspace() const = 0; 371 virtual const std::string& text_delimiter() const = 0; 372 // Word 373 virtual Word *previous() const NOT_IMPLEMENTED; 374 virtual Word *next() const NOT_IMPLEMENTED; 375 virtual const Word* resolveword( const std::string& ) const = 0; 376 virtual std::vector<Word*> context( size_t, 377 const std::string& ="" ) const NOT_IMPLEMENTED; 378 virtual std::vector<Word*> leftcontext( size_t, 379 const std::string& ="" ) const NOT_IMPLEMENTED; 380 virtual std::vector<Word*> rightcontext( size_t, 381 const std::string& ="" ) const NOT_IMPLEMENTED; 382 virtual Word *addWord( const KWargs& ) = 0; 383 virtual Word *addWord( const std::string& ="" ) = 0; 384 385 // corrections 386 virtual New *getNew() const NOT_IMPLEMENTED; 387 virtual FoliaElement *getNew( size_t ) const NOT_IMPLEMENTED; 388 virtual Original *getOriginal() const NOT_IMPLEMENTED; 389 virtual FoliaElement *getOriginal( size_t ) const NOT_IMPLEMENTED; 390 virtual Current *getCurrent() const NOT_IMPLEMENTED; 391 virtual FoliaElement *getCurrent( size_t ) const NOT_IMPLEMENTED; 392 virtual Correction *incorrection() const NOT_IMPLEMENTED; 393 virtual Correction *split( FoliaElement *, FoliaElement *, 394 const std::string& = "" ) NOT_IMPLEMENTED; 395 396 virtual Correction *mergewords( FoliaElement *, 397 const std::vector<FoliaElement *>&, 398 const std::string& = "" ) NOT_IMPLEMENTED; 399 400 virtual Correction *deleteword( FoliaElement *, 401 const std::string& = "" ) NOT_IMPLEMENTED; 402 virtual Correction *insertword( FoliaElement *, FoliaElement *, 403 const std::string& = "" ) NOT_IMPLEMENTED; 404 virtual std::vector<Suggestion*> suggestions() const NOT_IMPLEMENTED; 405 virtual Suggestion *suggestions( size_t ) const NOT_IMPLEMENTED; 406 407 virtual Correction *correct( const std::vector<FoliaElement*>&, 408 const std::vector<FoliaElement*>&, 409 const std::vector<FoliaElement*>&, 410 const std::vector<FoliaElement*>&, 411 const KWargs& ) NOT_IMPLEMENTED; 412 virtual Correction* correct( FoliaElement*, 413 FoliaElement*, 414 const KWargs& ) NOT_IMPLEMENTED; 415 virtual Correction* correct( FoliaElement*, 416 FoliaElement*, 417 const std::vector<FoliaElement*>&, 418 const KWargs& ) NOT_IMPLEMENTED; 419 virtual Correction *correct( const std::string& = "" ) NOT_IMPLEMENTED; 420 421 // TextContent 422 virtual const TextContent *text_content( const TextPolicy& ) const = 0; 423 virtual const TextContent *text_content( const std::string& = "current", 424 bool debug = false ) const = 0; 425 TextContent *settext( const std::string&, 426 const std::string& = "current" ); 427 TextContent *settext( const std::string&, 428 int, 429 const std::string& = "current" ); 430 TextContent *setutext( const UnicodeString&, 431 const std::string& = "current" ); 432 TextContent *setutext( const UnicodeString&, 433 int , 434 const std::string& = "current" ); 435 virtual int offset() const NOT_IMPLEMENTED; 436 virtual void set_offset( int ) const NOT_IMPLEMENTED; 437 438 void clear_textcontent( const std::string& = "current" ); 439 // PhonContent 440 virtual const PhonContent *phon_content( const TextPolicy& ) const = 0; 441 virtual const PhonContent *phon_content( const std::string& = "current", 442 bool debug=false ) const = 0; 443 444 // properties 445 virtual const std::string& get_delimiter( const TextPolicy& ) const = 0; 446 virtual void setDateTime( const std::string& ) = 0; 447 virtual const std::string getDateTime() const = 0; 448 virtual const std::string pos( const std::string& = "" ) const NOT_IMPLEMENTED; 449 virtual const std::string lemma( const std::string& = "" ) const NOT_IMPLEMENTED; 450 virtual const std::string cls() const = 0; 451 virtual void set_cls( const std::string& ) = 0; 452 virtual const std::string sett() const = 0; 453 virtual void set_set( const std::string& ) = 0; 454 virtual const std::string n() const = 0; 455 virtual void set_n( const std::string& ) = 0; 456 virtual const std::string tag() const = 0; 457 virtual const std::string set_tag( const std::string& ) = 0; 458 virtual const std::string id() const = 0; 459 virtual const std::string begintime() const = 0; 460 virtual void set_begintime( const std::string& ) = 0; 461 virtual const std::string endtime() const = 0; 462 virtual void set_endtime( const std::string& ) = 0; 463 virtual const std::string speech_src() const = 0; 464 virtual void set_speech_src( const std::string& ) = 0; 465 virtual const std::string speech_speaker() const = 0; 466 virtual void set_speech_speaker( const std::string& ) = 0; 467 virtual const std::string language( const std::string& = "" ) const = 0; 468 virtual const std::string set_to_current() NOT_IMPLEMENTED; 469 virtual double confidence() const = 0; 470 virtual void set_confidence( double ) = 0; 471 virtual void confidence( double ) = 0; // deprecated 472 virtual bool space() const = 0; 473 virtual bool set_space( bool ) = 0; 474 virtual SPACE_FLAGS spaces_flag() const = 0; 475 virtual void set_spaces_flag( SPACE_FLAGS ) = 0; 476 virtual ElementType element_id() const = 0; 477 virtual size_t occurrences() const = 0; 478 virtual size_t occurrences_per_set() const = 0; 479 virtual Attrib required_attributes() const = 0; 480 virtual Attrib optional_attributes() const = 0; 481 virtual const std::string& xmltag() const = 0; classname()482 const std::string& classname() const { return xmltag(); }; //synomym 483 virtual const std::string& default_subset() const = 0; 484 virtual const std::string subset() const NOT_IMPLEMENTED; 485 virtual bool setonly() const = 0; 486 virtual bool auto_generate_id() const = 0; 487 virtual Document *doc() const = 0; 488 virtual Sentence *sentence() const NOT_IMPLEMENTED; 489 virtual Paragraph *paragraph() const NOT_IMPLEMENTED; 490 virtual Division *division() const NOT_IMPLEMENTED; 491 virtual std::vector<Paragraph*> paragraphs() const NOT_IMPLEMENTED; 492 virtual std::vector<Sentence*> sentences() const NOT_IMPLEMENTED; 493 virtual std::vector<Word*> words( const std::string& ="" ) const NOT_IMPLEMENTED; 494 virtual std::vector<FoliaElement*> wrefs() const NOT_IMPLEMENTED; 495 virtual FoliaElement* wrefs( size_t ) const NOT_IMPLEMENTED; 496 497 virtual std::vector<Morpheme*> morphemes( const std::string& ="" ) const NOT_IMPLEMENTED; 498 virtual Morpheme* morpheme( size_t, const std::string& ="" ) const NOT_IMPLEMENTED; 499 virtual Sentence *sentences( size_t ) const NOT_IMPLEMENTED; 500 virtual Sentence *rsentences( size_t ) const NOT_IMPLEMENTED; 501 virtual Paragraph *paragraphs( size_t ) const NOT_IMPLEMENTED; 502 virtual Paragraph *rparagraphs( size_t ) const NOT_IMPLEMENTED; 503 virtual Word *words( size_t, const std::string& ="" ) const NOT_IMPLEMENTED; 504 virtual std::vector<Word *> wordParts() const NOT_IMPLEMENTED; 505 virtual Word *rwords( size_t, const std::string& ="" ) const NOT_IMPLEMENTED; 506 507 virtual DependencyDependent *dependent() const NOT_IMPLEMENTED; 508 509 virtual const std::string description() const; 510 511 // alternatives 512 virtual std::vector<Alternative *> alternatives( ElementType, 513 const std::string& = "" 514 ) const NOT_IMPLEMENTED; 515 std::vector<Alternative*> alternatives( const std::string& s = "" ) const { 516 return alternatives( BASE, s ); 517 } 518 519 virtual const std::string content() const NOT_IMPLEMENTED; 520 virtual const std::string src() const NOT_IMPLEMENTED; 521 virtual const UnicodeString caption() const NOT_IMPLEMENTED; 522 virtual std::vector<FoliaElement *> resolve() const NOT_IMPLEMENTED; 523 virtual const FoliaElement* resolveid() const NOT_IMPLEMENTED; 524 virtual bool checkAtts() = 0; 525 virtual const UnicodeString deeptext( const TextPolicy& ) const NOT_IMPLEMENTED; 526 virtual const UnicodeString deepphon( const TextPolicy& ) const NOT_IMPLEMENTED; 527 528 529 virtual std::vector<FoliaElement*> select( ElementType, 530 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0; 531 virtual std::vector<FoliaElement*> select( ElementType, 532 const std::set<ElementType>& , 533 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0; 534 virtual std::vector<FoliaElement*> select( ElementType, 535 const std::string&, 536 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0; 537 virtual std::vector<FoliaElement*> select( ElementType, 538 const std::string&, 539 const std::set<ElementType>& , 540 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const = 0; 541 // some 'internal stuff 542 virtual int refcount() const = 0; 543 virtual void increfcount() = 0; 544 virtual void decrefcount() = 0; 545 virtual void resetrefcount() = 0; 546 virtual void setAttributes( KWargs& ) = 0; 547 virtual KWargs collectAttributes() const = 0; 548 virtual void setAuth( bool b ) = 0; 549 virtual bool auth( ) const = 0; 550 virtual bool xlink() const = 0; 551 virtual const std::string href() const NOT_IMPLEMENTED; 552 virtual const std::string generateId( const std::string& ) NOT_IMPLEMENTED; 553 virtual const std::string textclass() const NOT_IMPLEMENTED; 554 virtual void unravel( std::set<FoliaElement*>& ) NOT_IMPLEMENTED; 555 static FoliaElement *private_createElement( ElementType ); 556 public: 557 static FoliaElement *createElement( ElementType, Document * =0 ); 558 static FoliaElement *createElement( const std::string&, Document * =0 ); 559 560 }; 561 562 class AbstractElement: public virtual FoliaElement { 563 friend void destroy( FoliaElement * ); 564 private: 565 //Constructor 566 AbstractElement( const AbstractElement& ); // inhibit copies 567 AbstractElement& operator=( const AbstractElement& ); // inhibit copies 568 protected: 569 AbstractElement( const properties& p, Document* = 0 ); 570 AbstractElement( const properties& p, FoliaElement * ); 571 virtual ~AbstractElement(); 572 public: 573 void destroy(); 574 void classInit(); 575 void classInit( const KWargs& ); 576 577 //functions regarding contained data size()578 size_t size() const { return _data.size(); }; 579 FoliaElement* index( size_t ) const; 580 FoliaElement* rindex( size_t ) const; 581 isinstance(ElementType et)582 bool isinstance( ElementType et ) const { 583 /// return true when the object is an instance of the type parameter 584 /*! 585 \param et the type to check against 586 */ 587 return et == element_id(); 588 } 589 590 void assignDoc( Document* ); parent()591 FoliaElement *parent() const { return _parent; }; set_parent(FoliaElement * p)592 void set_parent( FoliaElement *p ) { _parent = p ; }; 593 594 // modify the internal data 595 FoliaElement *append( FoliaElement* ); 596 FoliaElement *postappend( ); 597 void remove( FoliaElement * ); 598 std::vector<FoliaElement*> find_replacables( FoliaElement * ) const; 599 void replace( FoliaElement * ); 600 FoliaElement* replace( FoliaElement *, FoliaElement* ); 601 void insert_after( FoliaElement *, FoliaElement * ); data()602 const std::vector<FoliaElement*>& data() const { return _data; }; 603 604 // Sentences 605 Sentence *addSentence( const KWargs& ); 606 607 // MetaData 608 const MetaData *get_metadata() const; 609 const std::string get_metadata( const std::string& ) const; 610 611 // Selections 612 template <typename F> 613 std::vector<F*> select( bool recurse = true ) const { 614 return FoliaElement::select<F>(recurse); 615 } 616 617 template <typename F> 618 std::vector<F*> select( const std::string& st, 619 const std::set<ElementType>& exclude, 620 bool recurse = true ) const { 621 return FoliaElement::select<F>( st, exclude, recurse ); 622 } 623 624 template <typename F> 625 std::vector<F*> select( const std::string& st, 626 bool recurse = true ) const { 627 return FoliaElement::select<F>( st, recurse ); 628 } 629 630 template <typename F> 631 std::vector<F*> select( const char* st, 632 bool recurse = true ) const { 633 return FoliaElement::select<F>( st, recurse ); 634 } 635 636 template <typename F> 637 std::vector<F*> select( const std::set<ElementType>& exclude, 638 bool recurse = true ) const { 639 return FoliaElement::select<F>( exclude, recurse ); 640 } 641 annotator()642 const std::string annotator( ) const { return _annotator; }; annotator(const std::string & a)643 void annotator( const std::string& a ) { _annotator = a; }; processor()644 const std::string processor( ) const { return _processor; }; processor(const std::string & p)645 void processor( const std::string& p ) { _processor = p; }; annotatortype()646 AnnotatorType annotatortype() const { return _annotator_type; }; annotatortype(AnnotatorType t)647 void annotatortype( AnnotatorType t ) { _annotator_type = t; }; 648 649 template <typename F> addAnnotation(const KWargs & args)650 F *addAnnotation( const KWargs& args ) { 651 return FoliaElement::addAnnotation<F>( args ); 652 } 653 654 // Span annotations 655 std::vector<AbstractSpanAnnotation*> selectSpan() const; 656 657 // features 658 std::vector<std::string> feats( const std::string& ) const; 659 const std::string feat( const std::string& ) const; 660 661 //XML parsing 662 FoliaElement* parseXml( const xmlNode * ); 663 664 // text/string content 665 666 const std::string str( const std::string& = "current" ) const; 667 const std::string str( const TextPolicy& ) const; 668 669 const UnicodeString private_text( const TextPolicy& ) const; 670 const UnicodeString text( const TextPolicy & ) const; 671 const UnicodeString text( const std::string&, 672 TEXT_FLAGS = TEXT_FLAGS::NONE, 673 bool = false ) const; 674 const UnicodeString text( TEXT_FLAGS flags = TEXT_FLAGS::NONE, 675 bool debug = false ) const { 676 return text( "current", flags, debug ); 677 } 678 679 const UnicodeString phon( const TextPolicy& ) const; 680 const UnicodeString phon( const std::string&, 681 TEXT_FLAGS = TEXT_FLAGS::NONE ) const; 682 const UnicodeString phon( TEXT_FLAGS flags = TEXT_FLAGS::NONE ) const { 683 return phon( "current", flags ); 684 } 685 686 const UnicodeString deeptext( const TextPolicy& ) const; 687 const UnicodeString deepphon( const TextPolicy& ) const; 688 689 // Word resolveword(const std::string &)690 const Word* resolveword( const std::string& ) const { return 0; }; 691 Word *addWord( const KWargs& ); 692 Word *addWord( const std::string& ="" ); 693 // TextContent 694 const TextContent *text_content( const TextPolicy& ) const; 695 const TextContent *text_content( const std::string& = "current", 696 bool = false ) const; 697 // PhonContent 698 const PhonContent *phon_content( const TextPolicy& tp ) const; 699 const PhonContent *phon_content( const std::string& = "current", 700 bool = false ) const; 701 702 // properties 703 const std::string& get_delimiter( const TextPolicy& ) const; 704 705 // attributes cls()706 const std::string cls() const { return _class; }; set_cls(const std::string & cls)707 void set_cls( const std::string& cls ){ _class = cls; }; update_cls(const std::string & c)708 void update_cls( const std::string& c ) { set_cls( c ); } // deprecated 709 sett()710 const std::string sett() const { return _set; }; set_set(const std::string & st)711 void set_set( const std::string& st ){ _set = st; }; 712 tag()713 const std::string tag() const { return _tags; }; 714 const std::string set_tag( const std::string& ); settag(const std::string & t)715 const std::string settag( const std::string& t ){ 716 return set_tag(t); }; //deprecated 717 n()718 const std::string n() const { return _n; }; set_n(const std::string & n)719 void set_n( const std::string& n ){ _n = n; }; 720 id()721 const std::string id() const { return _id; }; 722 begintime()723 const std::string begintime() const { return _begintime; }; set_begintime(const std::string & bt)724 void set_begintime( const std::string& bt ){ _begintime = bt; }; 725 endtime()726 const std::string endtime() const { return _endtime; }; set_endtime(const std::string & bt)727 void set_endtime( const std::string& bt ){ _endtime = bt; }; 728 textclass()729 const std::string textclass() const { return _textclass; }; textclass(const std::string & tc)730 void textclass( const std::string& tc ){ _textclass = tc; }; 731 732 const std::string speech_src() const; 733 void set_speech_src( const std::string& ) NOT_IMPLEMENTED; 734 735 const std::string speech_speaker() const; 736 void set_speech_speaker( const std::string& ) NOT_IMPLEMENTED; 737 space()738 bool space() const { return _space; }; set_space(bool b)739 bool set_space( bool b ) { bool s =_space; _space = b; return s; }; 740 spaces_flag()741 SPACE_FLAGS spaces_flag() const { return _preserve_spaces; }; set_spaces_flag(SPACE_FLAGS f)742 void set_spaces_flag( SPACE_FLAGS f ) { _preserve_spaces = f; }; 743 confidence()744 double confidence() const { return _confidence; }; confidence(double d)745 void confidence( double d ) { _confidence = d; }; set_confidence(double d)746 void set_confidence( double d ) { _confidence = d; }; 747 748 const std::string language( const std::string& = "" ) const; src()749 const std::string src() const { return _src; }; 750 // generic properties 751 ElementType element_id() const; 752 size_t occurrences() const; 753 size_t occurrences_per_set() const; 754 Attrib required_attributes() const; 755 Attrib optional_attributes() const; 756 bool hidden() const; 757 const std::string& xmltag() const; 758 const std::string& default_subset() const; 759 AnnotationType annotation_type() const; 760 const std::set<ElementType>& accepted_data() const; 761 const std::set<ElementType>& required_data() const; 762 bool printable() const; 763 bool speakable() const; 764 bool referable() const; 765 bool is_textcontainer() const; 766 bool is_phoncontainer() const; 767 bool implicitspace() const; 768 const std::string& text_delimiter() const; 769 bool auth() const; 770 bool xlink() const; 771 bool setonly() const; 772 bool auto_generate_id() const; 773 doc()774 Document *doc() const { return _mydoc; }; 775 776 777 std::vector<FoliaElement*> select( ElementType, 778 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const; 779 std::vector<FoliaElement*> select( ElementType, 780 const std::set<ElementType>& , 781 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const; 782 std::vector<FoliaElement*> select( ElementType, 783 const std::string&, 784 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const; 785 std::vector<FoliaElement*> select( ElementType, 786 const std::string&, 787 const std::set<ElementType>& , 788 SELECT_FLAGS = SELECT_FLAGS::RECURSE ) const; 789 790 void unravel( std::set<FoliaElement*>& ); 791 792 protected: 793 xmlNode *xml( bool, bool = false ) const; 794 void setAttributes( KWargs& ); 795 KWargs collectAttributes() const; 796 xmlNs *foliaNs() const; 797 bool addable( const FoliaElement * ) const; 798 799 private: refcount()800 int refcount() const { return _refcount; }; increfcount()801 void increfcount() { ++_refcount; }; decrefcount()802 void decrefcount() { --_refcount; }; resetrefcount()803 void resetrefcount() { _refcount = 0; }; setAuth(bool b)804 void setAuth( bool b ){ _auth = b; }; 805 void setDateTime( const std::string& ); 806 const std::string getDateTime() const; 807 bool checkAtts(); 808 void set_typegroup( KWargs& ) const; 809 bool acceptable( ElementType ) const; 810 UnicodeString text_container_text( const TextPolicy& ) const; 811 void check_text_consistency(bool = true) const; 812 void check_text_consistency_while_parsing( bool = true, 813 bool = false ); //can't we merge these two somehow? 814 void check_append_text_consistency( const FoliaElement * ) const; 815 void check_set_declaration(); 816 void addFeatureNodes( const KWargs& args ); 817 Document *_mydoc; 818 FoliaElement *_parent; 819 bool _auth; 820 bool _space; 821 AnnotatorType _annotator_type; 822 int _refcount; 823 double _confidence; 824 std::string _annotator; 825 std::string _n; 826 std::string _datetime; 827 std::string _begintime; 828 std::string _endtime; 829 std::string _speaker; 830 std::string _textclass; 831 std::string _metadata; 832 std::string _processor; 833 std::string _set; 834 std::string _class; 835 std::string _id; 836 std::string _src; 837 std::string _tags; 838 SPACE_FLAGS _preserve_spaces; 839 std::vector<FoliaElement*> _data; 840 const properties& _props; 841 }; 842 843 bool isSubClass( const ElementType e1, const ElementType e2 ); 844 845 bool isSubClass( const FoliaElement *e1, const FoliaElement *e2 ); 846 847 template <typename T1, typename T2> isSubClass()848 bool isSubClass(){ 849 /// templated check if Type T1 is a subclass of Type T2 850 /*! 851 \return true if T1 is a SubClass of T2. 852 This is about C++ class inheritance: is our class a derivative of c's 853 class? 854 */ 855 return isSubClass( T1::PROPS.ELEMENT_ID, T2::PROPS.ELEMENT_ID ); 856 } 857 858 bool operator==( const FoliaElement&, const FoliaElement& ); 859 inline bool operator!=( const FoliaElement& e1, const FoliaElement& e2 ){ 860 return !( e1 == e2 ); 861 } 862 len(const FoliaElement * e)863 inline size_t len( const FoliaElement *e ) { 864 /// return the number of FoliaElement children of '\e e 865 return e->size(); 866 } 867 868 template <typename T> len(const std::vector<T> & v)869 inline size_t len( const std::vector<T>& v ) { 870 /// return the size of the given vector 871 return v.size(); } 872 873 inline const std::string str( const FoliaElement *e, 874 const std::string& cls = "current" ) { 875 /// return the string value contained in \e e 876 /*! 877 \param e The FoliaElement 878 \param cls the textclass we want 879 \return the (UTF8) string value 880 */ 881 return e->str( cls ); } 882 str(const FoliaElement * e,const TextPolicy & tp)883 inline const std::string str( const FoliaElement *e, 884 const TextPolicy& tp ){ 885 /// return the string value contained in \e e 886 /*! 887 \param e The FoliaElement 888 \param tp the TextPolicy to use 889 \return the (UTF8) string value 890 */ 891 return e->str( tp ); 892 } 893 text(const FoliaElement * e,const TextPolicy & tp)894 inline const UnicodeString text( const FoliaElement *e, 895 const TextPolicy& tp ){ 896 /// return the Unicode value contained in \e e 897 /*! 898 \param e The FoliaElement 899 \param tp the TextPolicy to use 900 \return the Unicode string value 901 */ 902 if ( e ){ 903 return e->text( tp ); 904 } 905 else { 906 throw ValueError( "text() for empty element" ); 907 } 908 } 909 910 inline const UnicodeString text( const FoliaElement *e, 911 const std::string& cls = "current", 912 bool debug = false ) { 913 /// return the Unicode value contained in \e e 914 /*! 915 \param e The FoliaElement 916 \param cls the textclass we want 917 \param debug enables debugging when true 918 \return the Unicode string value 919 */ 920 if ( e ){ 921 return e->text( cls, TEXT_FLAGS::NONE, debug ); 922 } 923 else { 924 throw ValueError( "text() for empty element" ); 925 } 926 } 927 unicode(const FoliaElement * e)928 inline const UnicodeString unicode( const FoliaElement *e ) { 929 /// return the Unicode value contained in \e e 930 /*! 931 \param e The FoliaElement 932 \return the Unicode string value 933 */ 934 return e->unicode(); } 935 isinstance(const FoliaElement * e,ElementType t)936 inline bool isinstance( const FoliaElement *e, ElementType t ) { 937 /// return true when the first parameter is an instance of the type 938 /// given by the second parameter 939 /*! 940 \param e the FoliaElement to test 941 \param t the type to check against 942 */ 943 return e->isinstance( t ); } 944 945 class AllowGenerateID: public virtual FoliaElement { 946 public: 947 void setMaxId( FoliaElement * ); 948 const std::string generateId( const std::string& tag ); 949 private: 950 std::map<std::string, int> id_map; 951 }; 952 953 class AllowCorrections: public virtual FoliaElement { 954 /// Interface class that enables corrections on Elements 955 public: 956 Correction *correct( const std::vector<FoliaElement*>&, 957 const std::vector<FoliaElement*>&, 958 const std::vector<FoliaElement*>&, 959 const std::vector<FoliaElement*>&, 960 const KWargs& ); 961 Correction *correct( FoliaElement*, 962 FoliaElement*, 963 const KWargs& ); 964 Correction* correct( FoliaElement*, 965 FoliaElement*, 966 const std::vector<FoliaElement*>&, 967 const KWargs& ); 968 Correction *correct( const std::string& = "" ); 969 }; 970 971 class AllowXlink: public virtual FoliaElement { 972 public: 973 const std::string href() const; 974 void setAttributes( KWargs& ); 975 KWargs collectAttributes() const; 976 protected: 977 std::map<std::string,std::string> _xlink; 978 }; 979 980 class AllowInlineAnnotation: public AllowCorrections { 981 public: allowannotations()982 bool allowannotations() const { return true; }; 983 template <typename F> 984 std::vector<F*> annotations( const std::string& s = "" ) const { 985 return FoliaElement::annotations<F>( s ); 986 } 987 988 template <typename F> 989 int has_annotation( const std::string& st = "" ) const { 990 return FoliaElement::has_annotation<F>(st); 991 } 992 993 template <typename F> 994 F *annotation( const std::string& st = "" ) const { 995 return FoliaElement::annotation<F>(st); 996 } 997 998 std::vector<Alternative *> alternatives( ElementType = BASE, 999 const std::string& = "" ) const; 1000 1001 1002 PosAnnotation *addPosAnnotation( const KWargs& ); 1003 PosAnnotation *getPosAnnotations( const std::string&, 1004 std::vector<PosAnnotation*>& ) const; 1005 LemmaAnnotation *addLemmaAnnotation( const KWargs& ); 1006 LemmaAnnotation *getLemmaAnnotations( const std::string&, 1007 std::vector<LemmaAnnotation*>& ) const; 1008 1009 }; 1010 std::string VersionName(); 1011 std::string Version(); 1012 1013 UnicodeString trim_space( const UnicodeString& in ); 1014 UnicodeString postprocess_spaces( const UnicodeString& in ); 1015 std::string tagToAtt( const FoliaElement* ); 1016 void destroy( FoliaElement *el ); 1017 1018 } // namespace folia 1019 1020 #endif // FOLIA_IMPL_H 1021