1 /**
2 * Yudit Unicode Editor Source File
3 *
4 * GNU Copyright (C) 1997-2006 Gaspar Sinai <gaspar@yudit.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2,
8 * dated June 1991. See file COPYYING for details.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 #include "stoolkit/sencoder/SB_Generic.h"
21 #include "stoolkit/SString.h"
22 #include "stoolkit/SStringVector.h"
23 #include "stoolkit/SEncoder.h"
24 #include "stoolkit/SCluster.h"
25
26 /**
27 * This is a sample (base) implementation of the core encoding class
28 * @author: Gaspar Sinai <gaspar@yudit.org>
29 * @version: 2000-05-12
30 * E2 80 A8 E2 80 A9 are paragraph and line seps in utf-8 (U+20A8, U+20A9)
31 */
SB_Generic(const SString & n)32 SB_Generic::SB_Generic(const SString& n) : SBEncoder ("\n,\r\n,\r"), map (n)
33 {
34 ok = map.isOK();
35 clustered = map.isClustered();
36 }
37
~SB_Generic()38 SB_Generic::~SB_Generic ()
39 {
40 }
41
42 /**
43 * return false if this generic encoder does not exist.
44 */
45 bool
isOK() const46 SB_Generic::isOK() const
47 {
48 return ok;
49 }
50
51 /**
52 * This is encoding a unicode string into a bytestring
53 * @param input is a unicode string.
54 */
55 const SString&
encode(const SV_UCS4 & input)56 SB_Generic::encode (const SV_UCS4& input)
57 {
58 return map.encode(input);
59 }
60
61 /**
62 * Decode an input string into a unicode string.
63 * @param input is a string.
64 * he output can be null, in this case a line is not
65 * read fully. If input size is zero output will be flushed.
66 */
67 const SV_UCS4&
decode(const SString & input)68 SB_Generic::decode (const SString& input)
69 {
70 /* if clustered, we let output out only if a cluster is finished */
71 if (!clustered) return map.decode(input);
72
73 ucs4string.clear();
74 const SV_UCS4& ret = map.decode(input);
75 if (ret.size()) remaining.append (ret);
76
77 /* clusterize it */
78 bool zeorinput = (input.size()==0);
79 while(remaining.size())
80 {
81 int finished; SV_UCS4 out;
82 unsigned int next = getCluster (remaining, 0, &out, &finished);
83 /* it *is* finished */
84 if (finished==0 && zeorinput) finished = 1;
85 if (!finished) break;
86 if (next==0) next = 1;
87 while (next)
88 {
89 ucs4string.append (remaining[0]);
90 remaining.remove(0);
91 next--;
92 }
93 }
94 if (zeorinput) map.reset(false);
95 return (ucs4string);
96 }
97
98 /**
99 * These methods guess the line delimiters for the input
100 * The one without arguments is giving the 'first approximation'
101 * It returns an inclusive list of all possibilities.
102 */
103 const SStringVector&
delimiters()104 SB_Generic::delimiters ()
105 {
106 return realDelimiters;
107 }
108
109 /**
110 * These methods guess the line delimiters for the input
111 * The one without arguments is giving the 'first approximation'
112 * It returns an exact list
113 */
114 const SStringVector&
delimiters(const SString & sample)115 SB_Generic::delimiters (const SString& sample)
116 {
117 return sampleDelimiters;
118 }
119
120 #if 0
121 /**
122 * @return the text in the decode buffer
123 */
124 SString
125 SB_Generic::remainder() const
126 {
127 if (!clustered)
128 {
129 return SString(map.remainder());
130 }
131 /* Make a string from UCS4 remainder, by appliying reverse map. */
132 SUniMap m = map;
133 SString ret = m.encode (remaining);
134 SV_UCS4 empty;
135 ret.append (m.encode(empty));
136 SString rem = map.remainder();
137 //fprintf (stderr, "rem.size%u ucs4.size=%u string.size=%u\n",
138 // remaining.size(), ret.size(), rem.size());
139 ret.append (rem);
140 return SString(ret);
141 }
142 #endif
143
144 /* for non-clustering it is remainder */
145 SString
preEditBuffer() const146 SB_Generic::preEditBuffer() const
147 {
148 if (!clustered)
149 {
150 return SString(map.remainder());
151 }
152 return SString(map.remainder()); // for a change :)
153 }
154
155 /* for clustering */
156 SV_UCS4
postEditBuffer() const157 SB_Generic::postEditBuffer () const
158 {
159 if (!clustered)
160 {
161 return SV_UCS4();
162 }
163 return SV_UCS4(remaining);
164 }
165
166 /**
167 * return key value map to see what decodes to what
168 * @param key will contain the keys
169 * @param value will contain the values
170 * @param _size is the maximum size of returned arrays
171 * @return the real size of the arrays.
172 */
173 unsigned int
getDecoderMap(SStringVector * key,SStringVector * value,unsigned int _size)174 SB_Generic::getDecoderMap (SStringVector* key, SStringVector* value,
175 unsigned int _size)
176 {
177 key->clear();
178 value->clear();
179 if (!map.isOK()) return 0;
180 return (map.getDecoderMap (key, value, _size));
181 }
182
183 /**
184 * Clear the map
185 */
186 void
clear()187 SB_Generic::clear()
188 {
189 remaining.clear();
190 map.reset();
191 }
192