1 /* 2 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License as 6 * published by the Free Software Foundation; either version 2 of the 7 * License, or (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 #ifndef __TAGGERWORD_H 18 #define __TAGGERWORD_H 19 20 #include <iostream> 21 #include <map> 22 #include <set> 23 #include <string> 24 #include <vector> 25 26 #include <lttoolbox/ltstr.h> 27 #include <apertium/ttag.h> 28 #include <apertium/apertium_re.h> 29 30 using namespace std; 31 32 /** Class TaggerWord. 33 * It stores the superficial form and all possible tags that it can receive. 34 * It has the fine tags delivered by the morphological analyzer and the coarse 35 * ones used by the PoS tagger. 36 */ 37 class TaggerWord{ 38 private: 39 wstring superficial_form; 40 41 set<TTag> tags; //Set of all possible tags 42 map<TTag, wstring> lexical_forms; //For a given coarse tag it stores the fine tag 43 //delevered by the morphological analyzer 44 wstring ignored_string; 45 46 bool plus_cut; //Flag to distinguish the way in which the word was ended. 47 //If it was done by '$' its value should be false 48 //If it was done by '+' its value should be true 49 bool previous_plus_cut; //Flag to distinguish the way in which the 50 //previous word was ended. It has the same 51 //plus_cut meaning 52 bool show_sf; // Show the superficial form in the output 53 static map<wstring, ApertiumRE, Ltstr> patterns; 54 55 bool match(wstring const &s, wstring const &pattern); 56 public: 57 static bool generate_marks; 58 static vector<wstring> array_tags; 59 60 static bool show_ignored_string; 61 62 /** 63 * Constructor 64 */ 65 TaggerWord(bool prev_plus_cut=false); 66 67 /** 68 * Copy constructor 69 */ 70 TaggerWord(const TaggerWord &w); 71 72 /** 73 * Destructor 74 */ 75 virtual ~TaggerWord(); 76 77 /** Set the superficial form of the word. 78 * @param s the superficial form 79 */ 80 void set_superficial_form(const wstring &s); 81 82 /** Get the superficial form of the word 83 * 84 */ 85 wstring& get_superficial_form(); 86 87 /** Add a new tag to the set of all possible tags of the word. 88 * @param t the coarse tag 89 * @param lf the lexical form (fine tag) 90 */ 91 virtual void add_tag(TTag &t, const wstring &lf, vector<wstring> const &prefer_rules); 92 93 /** Get the set of tags of this word. 94 * @return set of tags. 95 */ 96 virtual set<TTag>& get_tags(); 97 98 /** Get a wstring with the set of tags 99 */ 100 virtual wstring get_string_tags(); 101 102 /** Get the lexical form (fine tag) for a given tag (coarse one) 103 * @param t the tag 104 * @return the lexical form of tag t 105 */ 106 virtual wstring get_lexical_form(TTag &t, int const TAG_kEOF); 107 108 wstring get_all_chosen_tag_first(TTag &t, int const TAG_kEOF); 109 110 /** Get the lexical form (fine tag) for a given tag (coarse one) 111 * @param t the tag 112 * @return the lexical form of tag t without other text that 113 * is ignored. 114 */ 115 wstring get_lexical_form_without_ignored_string(TTag &t, int const TAG_kEOF); 116 117 /** Add text to the ignored string 118 * 119 */ 120 void add_ignored_string(wstring const &s); 121 122 /** Set the flag plus_cut to a certain value. If this flag is set to true means 123 * that there were a '+' between this word and the next one 124 */ 125 void set_plus_cut(const bool &c); 126 127 /** 128 * Get and set the "show superficial form" flag 129 */ 130 void set_show_sf(bool sf); 131 bool get_show_sf(); 132 133 /** Get the value of the plus_cut flag */ 134 bool get_plus_cut(); 135 136 /** Output operator 137 */ 138 friend wostream& operator<< (wostream& os, TaggerWord &w); 139 140 static void setArrayTags(vector<wstring> const &at); 141 142 void print(); 143 144 void outputOriginal(FILE *output); 145 146 bool isAmbiguous() const; // CAUTION: unknown words are not considered to 147 // be ambiguous by this method 148 149 void discardOnAmbiguity(wstring const &tags); 150 }; 151 152 #endif 153