1 /**
2  *  Yudit Unicode Editor Source File
3  *
4  *  GNU Copyright (C) 1997-2006  Gaspar Sinai <gaspar@yudit.org>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License, version 2,
8  *  dated June 1991. See file COPYYING for details.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, write to the Free Software
17  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  */
19 
20 #include "stoolkit/sencoder/SB_Generic.h"
21 #include "stoolkit/SString.h"
22 #include "stoolkit/SStringVector.h"
23 #include "stoolkit/SEncoder.h"
24 #include "stoolkit/SCluster.h"
25 
26 /**
27  * This is a sample (base) implementation of the core encoding class
28  * @author: Gaspar Sinai <gaspar@yudit.org>
29  * @version: 2000-05-12
30  * E2 80 A8 E2 80 A9 are paragraph and line seps in utf-8 (U+20A8, U+20A9)
31  */
SB_Generic(const SString & n)32 SB_Generic::SB_Generic(const SString& n) : SBEncoder ("\n,\r\n,\r"), map (n)
33 {
34   ok = map.isOK();
35   clustered = map.isClustered();
36 }
37 
~SB_Generic()38 SB_Generic::~SB_Generic ()
39 {
40 }
41 
42 /**
43  * return false if this generic encoder does not exist.
44  */
45 bool
isOK() const46 SB_Generic::isOK() const
47 {
48   return ok;
49 }
50 
51 /**
52  * This is encoding a unicode string into a bytestring
53  * @param input is a unicode string.
54  */
55 const SString&
encode(const SV_UCS4 & input)56 SB_Generic::encode (const SV_UCS4& input)
57 {
58   return map.encode(input);
59 }
60 
61 /**
62  * Decode an input string into a unicode string.
63  * @param input is a string.
64  *   he output can be null, in this case a line is not
65  *   read fully. If input size is zero output will be flushed.
66  */
67 const SV_UCS4&
decode(const SString & input)68 SB_Generic::decode (const SString& input)
69 {
70   /* if clustered, we let output out only if a cluster is finished */
71   if (!clustered) return map.decode(input);
72 
73   ucs4string.clear();
74   const SV_UCS4& ret = map.decode(input);
75   if (ret.size()) remaining.append (ret);
76 
77   /* clusterize it */
78   bool zeorinput = (input.size()==0);
79   while(remaining.size())
80   {
81     int finished; SV_UCS4  out;
82     unsigned int next = getCluster (remaining, 0, &out, &finished);
83     /* it *is* finished */
84     if (finished==0 && zeorinput) finished = 1;
85     if (!finished) break;
86     if (next==0) next = 1;
87     while (next)
88     {
89       ucs4string.append (remaining[0]);
90       remaining.remove(0);
91       next--;
92     }
93   }
94   if (zeorinput) map.reset(false);
95   return (ucs4string);
96 }
97 
98 /**
99  * These methods guess the line delimiters for the input
100  * The one without arguments is giving the 'first approximation'
101  * It returns an inclusive list of all possibilities.
102  */
103 const SStringVector&
delimiters()104 SB_Generic::delimiters ()
105 {
106   return realDelimiters;
107 }
108 
109 /**
110  * These methods guess the line delimiters for the input
111  * The one without arguments is giving the 'first approximation'
112  * It returns an exact list
113  */
114 const SStringVector&
delimiters(const SString & sample)115 SB_Generic::delimiters (const SString& sample)
116 {
117   return sampleDelimiters;
118 }
119 
120 #if 0
121 /**
122  * @return the text in the decode buffer
123  */
124 SString
125 SB_Generic::remainder() const
126 {
127   if (!clustered)
128   {
129     return SString(map.remainder());
130   }
131   /* Make a string from UCS4 remainder, by appliying reverse map. */
132   SUniMap m = map;
133   SString ret = m.encode (remaining);
134   SV_UCS4 empty;
135   ret.append (m.encode(empty));
136   SString rem = map.remainder();
137   //fprintf (stderr, "rem.size%u ucs4.size=%u string.size=%u\n",
138   //    remaining.size(), ret.size(), rem.size());
139   ret.append (rem);
140   return SString(ret);
141 }
142 #endif
143 
144 /* for non-clustering it is remainder */
145 SString
preEditBuffer() const146 SB_Generic::preEditBuffer() const
147 {
148   if (!clustered)
149   {
150     return SString(map.remainder());
151   }
152   return SString(map.remainder()); // for a change :)
153 }
154 
155 /* for clustering */
156 SV_UCS4
postEditBuffer() const157 SB_Generic::postEditBuffer () const
158 {
159   if (!clustered)
160   {
161      return SV_UCS4();
162   }
163   return SV_UCS4(remaining);
164 }
165 
166 /**
167  * return key value map to see what decodes to what
168  * @param key will contain the keys
169  * @param value will contain the values
170  * @param _size is the maximum size of returned arrays
171  * @return the real size of the arrays.
172  */
173 unsigned int
getDecoderMap(SStringVector * key,SStringVector * value,unsigned int _size)174 SB_Generic::getDecoderMap (SStringVector* key, SStringVector* value,
175         unsigned int _size)
176 {
177   key->clear();
178   value->clear();
179   if (!map.isOK()) return 0;
180   return (map.getDecoderMap (key, value, _size));
181 }
182 
183 /**
184  * Clear the map
185  */
186 void
clear()187 SB_Generic::clear()
188 {
189   remaining.clear();
190   map.reset();
191 }
192